From 758e97c1e5f02869dbae81a7a428d270a016464e Mon Sep 17 00:00:00 2001
From: jhendersonHDF <jhenderson@hdfgroup.org>
Date: Thu, 24 Feb 2022 10:04:59 -0600
Subject: Parallel Compression improvements (#1302)

---
 CMakeLists.txt                        |   12 +-
 MANIFEST                              |    2 +
 config/cmake/H5pubconf.h.in           |    3 +
 configure.ac                          |   17 +-
 examples/CMakeLists.txt               |   42 +-
 examples/CMakeTests.cmake             |   40 +-
 examples/Makefile.am                  |    6 +-
 examples/ph5_filtered_writes.c        |  482 ++
 examples/ph5_filtered_writes_no_sel.c |  366 ++
 examples/ph5example.c                 |    8 +-
 release_docs/RELEASE.txt              |   40 +-
 src/H5Dchunk.c                        |  378 +-
 src/H5Dint.c                          |   55 +-
 src/H5Dio.c                           |  128 +-
 src/H5Dmpio.c                         | 5397 ++++++++++++++-----
 src/H5Dpkg.h                          |   17 +-
 src/H5Dselect.c                       |  187 +-
 src/H5FDmpio.c                        |   44 +-
 src/H5Fmpi.c                          |   64 +
 src/H5Fprivate.h                      |    3 +-
 src/H5mpi.c                           |  233 +
 src/H5private.h                       |   30 +
 src/H5public.h                        |    7 +-
 testpar/t_2Gio.c                      |  275 +-
 testpar/t_dset.c                      |  277 +-
 testpar/t_filters_parallel.c          | 9394 ++++++++++++++++++++-------------
 testpar/t_filters_parallel.h          |  117 +-
 testpar/testphdf5.h                   |    4 -
 28 files changed, 11790 insertions(+), 5838 deletions(-)
 create mode 100644 examples/ph5_filtered_writes.c
 create mode 100644 examples/ph5_filtered_writes_no_sel.c

diff --git a/CMakeLists.txt b/CMakeLists.txt
index fedce44..cad378b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -708,10 +708,14 @@ if (HDF5_ENABLE_PARALLEL)
 
     # Used by Parallel Compression feature
     set (PARALLEL_FILTERED_WRITES ON)
-    CHECK_SYMBOL_EXISTS (MPI_Mprobe "mpi.h" H5_HAVE_MPI_Mprobe)
-    CHECK_SYMBOL_EXISTS (MPI_Imrecv "mpi.h" H5_HAVE_MPI_Imrecv)
-    if (NOT H5_HAVE_MPI_Mprobe OR NOT H5_HAVE_MPI_Imrecv)
-      message (WARNING "The MPI_Mprobe and/or MPI_Imrecv functions could not be located.
+    CHECK_SYMBOL_EXISTS (MPI_Ibarrier "mpi.h" H5_HAVE_MPI_Ibarrier)
+    CHECK_SYMBOL_EXISTS (MPI_Issend "mpi.h" H5_HAVE_MPI_Issend)
+    CHECK_SYMBOL_EXISTS (MPI_Iprobe "mpi.h" H5_HAVE_MPI_Iprobe)
+    CHECK_SYMBOL_EXISTS (MPI_Irecv "mpi.h" H5_HAVE_MPI_Irecv)
+    if (H5_HAVE_MPI_Ibarrier AND H5_HAVE_MPI_Issend AND H5_HAVE_MPI_Iprobe AND H5_HAVE_MPI_Irecv)
+      set (H5_HAVE_PARALLEL_FILTERED_WRITES 1)
+    else ()
+      message (WARNING "The MPI_Ibarrier/MPI_Issend/MPI_Iprobe/MPI_Irecv functions could not be located.
                Parallel writes of filtered data will be disabled.")
       set (PARALLEL_FILTERED_WRITES OFF)
     endif ()
diff --git a/MANIFEST b/MANIFEST
index 0aa9957..37b84d4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -333,6 +333,8 @@
 ./examples/h5_ref2reg_deprec.c
 ./examples/h5_shared_mesg.c
 ./examples/ph5example.c
+./examples/ph5_filtered_writes.c
+./examples/ph5_filtered_writes_no_sel.c
 ./examples/h5_vds.c
 ./examples/h5_vds-exc.c
 ./examples/h5_vds-exclim.c
diff --git a/config/cmake/H5pubconf.h.in b/config/cmake/H5pubconf.h.in
index 1f7f4b1..4956c97 100644
--- a/config/cmake/H5pubconf.h.in
+++ b/config/cmake/H5pubconf.h.in
@@ -265,6 +265,9 @@
 /* Define if we have parallel support */
 #cmakedefine H5_HAVE_PARALLEL @H5_HAVE_PARALLEL@
 
+/* Define if we have support for writing to filtered datasets in parallel */
+#cmakedefine H5_HAVE_PARALLEL_FILTERED_WRITES @H5_HAVE_PARALLEL_FILTERED_WRITES@
+
 /* Define if both pread and pwrite exist. */
 #cmakedefine H5_HAVE_PREADWRITE @H5_HAVE_PREADWRITE@
 
diff --git a/configure.ac b/configure.ac
index 4cf329c..55468bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2987,11 +2987,11 @@ if test -n "$PARALLEL"; then
   fi
 
   ## ----------------------------------------------------------------------
-  ## Check for the MPI-3 functions necessary for the Parallel Compression
+  ## Check for the MPI functions necessary for the Parallel Compression
   ## feature. If these are not present, issue a warning that Parallel
   ## Compression will be disabled.
   ##
-  AC_MSG_CHECKING([for MPI_Mprobe and MPI_Imrecv functions])
+  AC_MSG_CHECKING([for MPI_Ibarrier/MPI_Issend/MPI_Iprobe/MPI_Irecv functions])
 
   AC_LINK_IFELSE(
       [AC_LANG_PROGRAM(
@@ -2999,16 +2999,19 @@ if test -n "$PARALLEL"; then
               #include <mpi.h>
           ]],
           [[
-              MPI_Message message;
+              int flag;
               MPI_Init(0, (void *) 0);
-              MPI_Mprobe(0, 0, 0, &message, (void *) 0);
-              MPI_Imrecv((void *) 0, 0, 0, (void *) 0, (void *) 0);
+              MPI_Ibarrier(0, (void *) 0);
+              MPI_Issend((void *) 0, 0, 0, 0, 0, 0, (void *) 0);
+              MPI_Iprobe(0, 0, 0, &flag, (void *) 0);
+              MPI_Irecv((void *) 0, 0, 0, 0, 0, 0, (void *) 0);
           ]]
       )],
       [AC_MSG_RESULT([yes])
-       PARALLEL_FILTERED_WRITES=yes],
+       PARALLEL_FILTERED_WRITES=yes
+       AC_DEFINE([HAVE_PARALLEL_FILTERED_WRITES], [1], [Define if we have support for writing to filtered datasets in parallel])],
       [AC_MSG_RESULT([no])
-       AC_MSG_WARN([A simple MPI program using the MPI_Mprobe and MPI_Imrecv functions could not be compiled and linked.
+       AC_MSG_WARN([A simple MPI program using the MPI_Ibarrier, MPI_Issend, MPI_Iprobe and MPI_Irecv functions could not be compiled and linked.
                     Parallel writes of filtered data will be disabled.])
        PARALLEL_FILTERED_WRITES=no]
   )
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 9ab870f..3f329c1 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -42,6 +42,14 @@ set (examples
     h5_vds-percival-unlim-maxmin
 )
 
+if (H5_HAVE_PARALLEL)
+  set (parallel_examples
+    ph5example
+    ph5_filtered_writes
+    ph5_filtered_writes_no_sel
+  )
+endif ()
+
 foreach (example ${examples})
   add_executable (${example} ${HDF5_EXAMPLES_SOURCE_DIR}/${example}.c)
   target_include_directories (${example} PRIVATE "${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
@@ -63,23 +71,25 @@ foreach (example ${examples})
 endforeach ()
 
 if (H5_HAVE_PARALLEL)
-  add_executable (ph5example ${HDF5_EXAMPLES_SOURCE_DIR}/ph5example.c)
-  target_include_directories (ph5example PRIVATE "${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
-  if (NOT BUILD_SHARED_LIBS)
-    TARGET_C_PROPERTIES (ph5example STATIC)
-    target_link_libraries (ph5example PRIVATE ${HDF5_LIB_TARGET} ${MPI_C_LIBRARIES})
-  else ()
-    TARGET_C_PROPERTIES (ph5example SHARED)
-    target_link_libraries (ph5example PRIVATE ${HDF5_LIBSH_TARGET} ${MPI_C_LIBRARIES})
-  endif ()
-  set_target_properties (ph5example PROPERTIES FOLDER examples)
+  foreach (parallel_example ${parallel_examples})
+    add_executable (${parallel_example} ${HDF5_EXAMPLES_SOURCE_DIR}/${parallel_example}.c)
+    target_include_directories (${parallel_example} PRIVATE "${HDF5_SRC_DIR};${HDF5_SRC_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
+    if (NOT BUILD_SHARED_LIBS)
+      TARGET_C_PROPERTIES (${parallel_example} STATIC)
+      target_link_libraries (${parallel_example} PRIVATE ${HDF5_LIB_TARGET} ${MPI_C_LIBRARIES})
+    else ()
+      TARGET_C_PROPERTIES (${parallel_example} SHARED)
+      target_link_libraries (${parallel_example} PRIVATE ${HDF5_LIBSH_TARGET} ${MPI_C_LIBRARIES})
+    endif ()
+    set_target_properties (${parallel_example} PROPERTIES FOLDER examples)
 
-  #-----------------------------------------------------------------------------
-  # Add Target to clang-format
-  #-----------------------------------------------------------------------------
-  if (HDF5_ENABLE_FORMATTERS)
-    clang_format (HDF5_EXAMPLES_ph5example_FORMAT ph5example)
-  endif ()
+    #-----------------------------------------------------------------------------
+    # Add Target to clang-format
+    #-----------------------------------------------------------------------------
+    if (HDF5_ENABLE_FORMATTERS)
+      clang_format (HDF5_EXAMPLES_${parallel_example}_FORMAT ${parallel_example})
+    endif ()
+  endforeach ()
 endif ()
 
 if (BUILD_TESTING AND HDF5_TEST_EXAMPLES)
diff --git a/examples/CMakeTests.cmake b/examples/CMakeTests.cmake
index 70142c8..3e24ba0 100644
--- a/examples/CMakeTests.cmake
+++ b/examples/CMakeTests.cmake
@@ -101,22 +101,26 @@ if (H5_HAVE_PARALLEL AND HDF5_TEST_PARALLEL AND NOT WIN32)
   # Ensure that 24 is a multiple of the number of processes.
   # The number 24 corresponds to SPACE1_DIM1 and SPACE1_DIM2 defined in ph5example.c
   math(EXPR NUMPROCS "24 / ((24 + ${MPIEXEC_MAX_NUMPROCS} - 1) / ${MPIEXEC_MAX_NUMPROCS})")
-  if (HDF5_ENABLE_USING_MEMCHECKER)
-    add_test (NAME MPI_TEST_EXAMPLES-ph5example COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:ph5example> ${MPIEXEC_POSTFLAGS})
-  else ()
-    add_test (NAME MPI_TEST_EXAMPLES-ph5example COMMAND "${CMAKE_COMMAND}"
-        -D "TEST_PROGRAM=${MPIEXEC_EXECUTABLE};${MPIEXEC_NUMPROC_FLAG};${NUMPROCS};${MPIEXEC_PREFLAGS};$<TARGET_FILE:ph5example>;${MPIEXEC_POSTFLAGS}"
-        -D "TEST_ARGS:STRING="
-        -D "TEST_EXPECT=0"
-        -D "TEST_OUTPUT=ph5example.out"
-        -D "TEST_REFERENCE:STRING=PHDF5 tests finished with no errors"
-        -D "TEST_FILTER:STRING=PHDF5 tests finished with no errors"
-        -D "TEST_FOLDER=${PROJECT_BINARY_DIR}"
-        -P "${HDF_RESOURCES_EXT_DIR}/grepTest.cmake"
-    )
-  endif ()
-  if (last_test)
-    set_tests_properties (MPI_TEST_EXAMPLES-ph5example PROPERTIES DEPENDS ${last_test})
-  endif ()
-  set (last_test "MPI_TEST_EXAMPLES-ph5example")
+
+  foreach (parallel_example ${parallel_examples})
+    if (HDF5_ENABLE_USING_MEMCHECKER)
+      add_test (NAME MPI_TEST_EXAMPLES-${parallel_example} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} ${NUMPROCS} ${MPIEXEC_PREFLAGS} $<TARGET_FILE:${parallel_example}> ${MPIEXEC_POSTFLAGS})
+    else ()
+      add_test (NAME MPI_TEST_EXAMPLES-${parallel_example} COMMAND "${CMAKE_COMMAND}"
+          -D "TEST_PROGRAM=${MPIEXEC_EXECUTABLE};${MPIEXEC_NUMPROC_FLAG};${NUMPROCS};${MPIEXEC_PREFLAGS};$<TARGET_FILE:${parallel_example}>;${MPIEXEC_POSTFLAGS}"
+          -D "TEST_ARGS:STRING="
+          -D "TEST_EXPECT=0"
+          -D "TEST_SKIP_COMPARE=TRUE"
+          -D "TEST_OUTPUT=${parallel_example}.out"
+          -D "TEST_REFERENCE:STRING=PHDF5 example finished with no errors"
+          #-D "TEST_FILTER:STRING=PHDF5 tests finished with no errors"
+          -D "TEST_FOLDER=${PROJECT_BINARY_DIR}"
+          -P "${HDF_RESOURCES_EXT_DIR}/grepTest.cmake"
+      )
+    endif ()
+    if (last_test)
+      set_tests_properties (MPI_TEST_EXAMPLES-${parallel_example} PROPERTIES DEPENDS ${last_test})
+    endif ()
+    set (last_test "MPI_TEST_EXAMPLES-${parallel_example}")
+  endforeach ()
 endif ()
diff --git a/examples/Makefile.am b/examples/Makefile.am
index 7b5aa63..161f789 100644
--- a/examples/Makefile.am
+++ b/examples/Makefile.am
@@ -20,7 +20,7 @@
 include $(top_srcdir)/config/commence.am
 
 if BUILD_PARALLEL_CONDITIONAL
-  EXAMPLE_PROG_PARA = ph5example
+  EXAMPLE_PROG_PARA = ph5example ph5_filtered_writes ph5_filtered_writes_no_sel
 endif
 
 INSTALL_SCRIPT_FILES = run-c-ex.sh
@@ -50,7 +50,7 @@ INSTALL_FILES = h5_write.c h5_read.c h5_extend_write.c h5_chunk_read.c h5_compou
             h5_group.c h5_select.c h5_attribute.c h5_mount.c h5_drivers.c \
             h5_reference_deprec.c h5_ref_extern.c h5_ref_compat.c h5_ref2reg_deprec.c \
             h5_extlink.c h5_elink_unix2win.c h5_shared_mesg.c h5_debug_trace.c \
-            ph5example.c \
+            ph5example.c ph5_filtered_writes.c ph5_filtered_writes_no_sel.c \
             h5_vds.c h5_vds-exc.c h5_vds-exclim.c h5_vds-eiger.c h5_vds-simpleIO.c \
             h5_vds-percival.c h5_vds-percival-unlim.c h5_vds-percival-unlim-maxmin.c
 
@@ -119,6 +119,8 @@ h5_reference_deprec: $(srcdir)/h5_reference_deprec.c
 h5_ref2reg_deprec: $(srcdir)/h5_ref2reg_deprec.c
 h5_drivers: $(srcdir)/h5_drivers.c
 ph5example: $(srcdir)/ph5example.c
+ph5_filtered_writes: $(srcdir)/ph5_filtered_writes.c
+ph5_filtered_writes_no_sel: $(srcdir)/ph5_filtered_writes_no_sel.c 
 h5_dtransform: $(srcdir)/h5_dtransform.c
 h5_extlink: $(srcdir)/h5_extlink.c $(EXTLINK_DIRS)
 h5_elink_unix2win: $(srcdir)/h5_elink_unix2win.c $(EXTLINK_DIRS)
diff --git a/examples/ph5_filtered_writes.c b/examples/ph5_filtered_writes.c
new file mode 100644
index 0000000..8b55528
--- /dev/null
+++ b/examples/ph5_filtered_writes.c
@@ -0,0 +1,482 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group.                                               *
+ * All rights reserved.                                                      *
+ *                                                                           *
+ * This file is part of HDF5.  The full HDF5 copyright notice, including     *
+ * terms governing use, modification, and redistribution, is contained in    *
+ * the COPYING file, which can be found at the root of the source code       *
+ * distribution tree, or in https://www.hdfgroup.org/licenses.               *
+ * If you do not have access to either file, you may request a copy from     *
+ * help@hdfgroup.org.                                                        *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Example of using the parallel HDF5 library to write to datasets
+ * with filters applied to them.
+ *
+ * If the HDF5_NOCLEANUP environment variable is set, the file that
+ * this example creates will not be removed as the example finishes.
+ *
+ * The need of requirement of parallel file prefix is that in general
+ * the current working directory in which compiling is done, is not suitable
+ * for parallel I/O and there is no standard pathname for parallel file
+ * systems. In some cases, the parallel file name may even need some
+ * parallel file type prefix such as: "pfs:/GF/...".  Therefore, this
+ * example parses the HDF5_PARAPREFIX environment variable for a prefix,
+ * if one is needed.
+ */
+
+#include <stdlib.h>
+
+#include "hdf5.h"
+
+#if defined(H5_HAVE_PARALLEL) && defined(H5_HAVE_PARALLEL_FILTERED_WRITES)
+
+#define EXAMPLE_FILE       "ph5_filtered_writes.h5"
+#define EXAMPLE_DSET1_NAME "DSET1"
+#define EXAMPLE_DSET2_NAME "DSET2"
+
+#define EXAMPLE_DSET_DIMS           2
+#define EXAMPLE_DSET_CHUNK_DIM_SIZE 10
+
+/* Dataset datatype */
+#define HDF5_DATATYPE H5T_NATIVE_INT
+typedef int C_DATATYPE;
+
+/* Global variables */
+int mpi_rank, mpi_size;
+
+/*
+ * Routine to set an HDF5 filter on the given DCPL
+ */
+static void
+set_filter(hid_t dcpl_id)
+{
+    htri_t filter_avail;
+
+    /*
+     * Check if 'deflate' filter is available
+     */
+    filter_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE);
+    if (filter_avail < 0)
+        return;
+    else if (filter_avail) {
+        /*
+         * Set 'deflate' filter with reasonable
+         * compression level on DCPL
+         */
+        H5Pset_deflate(dcpl_id, 6);
+    }
+    else {
+        /*
+         * Set Fletcher32 checksum filter on DCPL
+         * since it is always available in HDF5
+         */
+        H5Pset_fletcher32(dcpl_id);
+    }
+}
+
+/*
+ * Routine to fill a data buffer with data. Assumes
+ * dimension rank is 2 and data is stored contiguous.
+ */
+void
+fill_databuf(hsize_t start[], hsize_t count[], hsize_t stride[], C_DATATYPE *data)
+{
+    C_DATATYPE *dataptr = data;
+    hsize_t     i, j;
+
+    /* Use MPI rank value for data */
+    for (i = 0; i < count[0]; i++) {
+        for (j = 0; j < count[1]; j++) {
+            *dataptr++ = mpi_rank;
+        }
+    }
+}
+
+/* Cleanup created file */
+static void
+cleanup(char *filename)
+{
+    hbool_t do_cleanup = getenv(HDF5_NOCLEANUP) ? 0 : 1;
+
+    if (do_cleanup)
+        MPI_File_delete(filename, MPI_INFO_NULL);
+}
+
+/*
+ * Routine to write to a dataset in a fashion
+ * where no chunks in the dataset are written
+ * to by more than 1 MPI rank. This will
+ * generally give the best performance as the
+ * MPI ranks will need the least amount of
+ * inter-process communication.
+ */
+static void
+write_dataset_no_overlap(hid_t file_id, hid_t dxpl_id)
+{
+    C_DATATYPE data[EXAMPLE_DSET_CHUNK_DIM_SIZE][4 * EXAMPLE_DSET_CHUNK_DIM_SIZE];
+    hsize_t    dataset_dims[EXAMPLE_DSET_DIMS];
+    hsize_t    chunk_dims[EXAMPLE_DSET_DIMS];
+    hsize_t    start[EXAMPLE_DSET_DIMS];
+    hsize_t    stride[EXAMPLE_DSET_DIMS];
+    hsize_t    count[EXAMPLE_DSET_DIMS];
+    size_t     i, j;
+    hid_t      dset_id        = H5I_INVALID_HID;
+    hid_t      dcpl_id        = H5I_INVALID_HID;
+    hid_t      file_dataspace = H5I_INVALID_HID;
+
+    /*
+     * ------------------------------------
+     * Setup Dataset Creation Property List
+     * ------------------------------------
+     */
+
+    dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
+
+    /*
+     * REQUIRED: Dataset chunking must be enabled to
+     *           apply a data filter to the dataset.
+     *           Chunks in the dataset are of size
+     *           EXAMPLE_DSET_CHUNK_DIM_SIZE x EXAMPLE_DSET_CHUNK_DIM_SIZE.
+     */
+    chunk_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+    chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+    H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims);
+
+    /* Set filter to be applied to created datasets */
+    set_filter(dcpl_id);
+
+    /*
+     * ------------------------------------
+     * Define the dimensions of the dataset
+     * and create it
+     * ------------------------------------
+     */
+
+    /*
+     * Create a dataset composed of 4 chunks
+     * per MPI rank. The first dataset dimension
+     * scales according to the number of MPI ranks.
+     * The second dataset dimension stays fixed
+     * according to the chunk size.
+     */
+    dataset_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE * mpi_size;
+    dataset_dims[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;
+
+    file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL);
+
+    /* Create the dataset */
+    dset_id = H5Dcreate2(file_id, EXAMPLE_DSET1_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id,
+                         H5P_DEFAULT);
+
+    /*
+     * ------------------------------------
+     * Setup selection in the dataset for
+     * each MPI rank
+     * ------------------------------------
+     */
+
+    /*
+     * Each MPI rank's selection covers a
+     * single chunk in the first dataset
+     * dimension. Each MPI rank's selection
+     * covers 4 chunks in the second dataset
+     * dimension. This leads to each MPI rank
+     * writing to 4 chunks of the dataset.
+     */
+    start[0]  = mpi_rank * EXAMPLE_DSET_CHUNK_DIM_SIZE;
+    start[1]  = 0;
+    stride[0] = 1;
+    stride[1] = 1;
+    count[0]  = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+    count[1]  = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;
+
+    H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL);
+
+    /*
+     * --------------------------------------
+     * Fill data buffer with MPI rank's rank
+     * value to make it easy to see which
+     * part of the dataset each rank wrote to
+     * --------------------------------------
+     */
+
+    fill_databuf(start, count, stride, &data[0][0]);
+
+    /*
+     * ---------------------------------
+     * Write to the dataset collectively
+     * ---------------------------------
+     */
+
+    H5Dwrite(dset_id, HDF5_DATATYPE, H5S_BLOCK, file_dataspace, dxpl_id, data);
+
+    /*
+     * --------------
+     * Close HDF5 IDs
+     * --------------
+     */
+
+    H5Sclose(file_dataspace);
+    H5Pclose(dcpl_id);
+    H5Dclose(dset_id);
+}
+
+/*
+ * Routine to write to a dataset in a fashion
+ * where every chunk in the dataset is written
+ * to by every MPI rank. This will generally
+ * give the worst performance as the MPI ranks
+ * will need the most amount of inter-process
+ * communication.
+ */
+static void
+write_dataset_overlap(hid_t file_id, hid_t dxpl_id)
+{
+    C_DATATYPE data[mpi_size][EXAMPLE_DSET_CHUNK_DIM_SIZE];
+    hsize_t    dataset_dims[EXAMPLE_DSET_DIMS];
+    hsize_t    chunk_dims[EXAMPLE_DSET_DIMS];
+    hsize_t    start[EXAMPLE_DSET_DIMS];
+    hsize_t    stride[EXAMPLE_DSET_DIMS];
+    hsize_t    count[EXAMPLE_DSET_DIMS];
+    size_t     i, j;
+    hid_t      dset_id        = H5I_INVALID_HID;
+    hid_t      dcpl_id        = H5I_INVALID_HID;
+    hid_t      file_dataspace = H5I_INVALID_HID;
+
+    /*
+     * ------------------------------------
+     * Setup Dataset Creation Property List
+     * ------------------------------------
+     */
+
+    dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
+
+    /*
+     * REQUIRED: Dataset chunking must be enabled to
+     *           apply a data filter to the dataset.
+     *           Chunks in the dataset are of size
+     *           mpi_size x EXAMPLE_DSET_CHUNK_DIM_SIZE.
+     */
+    chunk_dims[0] = mpi_size;
+    chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+    H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims);
+
+    /* Set filter to be applied to created datasets */
+    set_filter(dcpl_id);
+
+    /*
+     * ------------------------------------
+     * Define the dimensions of the dataset
+     * and create it
+     * ------------------------------------
+     */
+
+    /*
+     * Create a dataset composed of N chunks,
+     * where N is the number of MPI ranks. The
+     * first dataset dimension scales according
+     * to the number of MPI ranks. The second
+     * dataset dimension stays fixed according
+     * to the chunk size.
+     */
+    dataset_dims[0] = mpi_size * chunk_dims[0];
+    dataset_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+
+    file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL);
+
+    /* Create the dataset */
+    dset_id = H5Dcreate2(file_id, EXAMPLE_DSET2_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id,
+                         H5P_DEFAULT);
+
+    /*
+     * ------------------------------------
+     * Setup selection in the dataset for
+     * each MPI rank
+     * ------------------------------------
+     */
+
+    /*
+     * Each MPI rank's selection covers
+     * part of every chunk in the first
+     * dimension. Each MPI rank's selection
+     * covers all of every chunk in the
+     * second dimension. This leads to
+     * each MPI rank writing an equal
+     * amount of data to every chunk
+     * in the dataset.
+     */
+    start[0]  = mpi_rank;
+    start[1]  = 0;
+    stride[0] = chunk_dims[0];
+    stride[1] = 1;
+    count[0]  = mpi_size;
+    count[1]  = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+
+    H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL);
+
+    /*
+     * --------------------------------------
+     * Fill data buffer with MPI rank's rank
+     * value to make it easy to see which
+     * part of the dataset each rank wrote to
+     * --------------------------------------
+     */
+
+    fill_databuf(start, count, stride, &data[0][0]);
+
+    /*
+     * ---------------------------------
+     * Write to the dataset collectively
+     * ---------------------------------
+     */
+
+    H5Dwrite(dset_id, HDF5_DATATYPE, H5S_BLOCK, file_dataspace, dxpl_id, data);
+
+    /*
+     * --------------
+     * Close HDF5 IDs
+     * --------------
+     */
+
+    H5Sclose(file_dataspace);
+    H5Pclose(dcpl_id);
+    H5Dclose(dset_id);
+}
+
+int
+main(int argc, char **argv)
+{
+    MPI_Comm comm       = MPI_COMM_WORLD;
+    MPI_Info info       = MPI_INFO_NULL;
+    hid_t    file_id    = H5I_INVALID_HID;
+    hid_t    fapl_id    = H5I_INVALID_HID;
+    hid_t    dxpl_id    = H5I_INVALID_HID;
+    char *   par_prefix = NULL;
+    char     filename[PATH_MAX];
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(comm, &mpi_size);
+    MPI_Comm_rank(comm, &mpi_rank);
+
+    /*
+     * ----------------------------------
+     * Start parallel access to HDF5 file
+     * ----------------------------------
+     */
+
+    /* Setup File Access Property List with parallel I/O access */
+    fapl_id = H5Pcreate(H5P_FILE_ACCESS);
+    H5Pset_fapl_mpio(fapl_id, comm, info);
+
+    /*
+     * OPTIONAL: Set collective metadata reads on FAPL to allow
+     *           parallel writes to filtered datasets to perform
+     *           better at scale. While not strictly necessary,
+     *           this is generally recommended.
+     */
+    H5Pset_all_coll_metadata_ops(fapl_id, true);
+
+    /*
+     * OPTIONAL: Set the latest file format version for HDF5 in
+     *           order to gain access to different dataset chunk
+     *           index types and better data encoding methods.
+     *           While not strictly necessary, this is generally
+     *           recommended.
+     */
+    H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST);
+
+    /* Parse any parallel prefix and create filename */
+    par_prefix = getenv("HDF5_PARAPREFIX");
+
+    snprintf(filename, PATH_MAX, "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "", EXAMPLE_FILE);
+
+    /* Create HDF5 file */
+    file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+
+    /*
+     * --------------------------------------
+     * Setup Dataset Transfer Property List
+     * with collective I/O
+     * --------------------------------------
+     */
+
+    dxpl_id = H5Pcreate(H5P_DATASET_XFER);
+
+    /*
+     * REQUIRED: Setup collective I/O for the dataset
+     *           write operations. Parallel writes to
+     *           filtered datasets MUST be collective,
+     *           even if some ranks have no data to
+     *           contribute to the write operation.
+     *
+     *           Refer to the 'ph5_filtered_writes_no_sel'
+     *           example to see how to setup a dataset
+     *           write when one or more MPI ranks have
+     *           no data to contribute to the write
+     *           operation.
+     */
+    H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE);
+
+    /*
+     * --------------------------------
+     * Create and write to each dataset
+     * --------------------------------
+     */
+
+    /*
+     * Write to a dataset in a fashion where no
+     * chunks in the dataset are written to by
+     * more than 1 MPI rank. This will generally
+     * give the best performance as the MPI ranks
+     * will need the least amount of inter-process
+     * communication.
+     */
+    write_dataset_no_overlap(file_id, dxpl_id);
+
+    /*
+     * Write to a dataset in a fashion where
+     * every chunk in the dataset is written
+     * to by every MPI rank. This will generally
+     * give the worst performance as the MPI ranks
+     * will need the most amount of inter-process
+     * communication.
+     */
+    write_dataset_overlap(file_id, dxpl_id);
+
+    /*
+     * ------------------
+     * Close all HDF5 IDs
+     * ------------------
+     */
+
+    H5Pclose(dxpl_id);
+    H5Pclose(fapl_id);
+    H5Fclose(file_id);
+
+    printf("PHDF5 example finished with no errors\n");
+
+    /*
+     * ------------------------------------
+     * Cleanup created HDF5 file and finish
+     * ------------------------------------
+     */
+
+    cleanup(filename);
+
+    MPI_Finalize();
+
+    return 0;
+}
+
+#else
+
+int
+main(void)
+{
+    printf("HDF5 not configured with parallel support or parallel filtered writes are disabled!\n");
+    return 0;
+}
+
+#endif
diff --git a/examples/ph5_filtered_writes_no_sel.c b/examples/ph5_filtered_writes_no_sel.c
new file mode 100644
index 0000000..14c68c8
--- /dev/null
+++ b/examples/ph5_filtered_writes_no_sel.c
@@ -0,0 +1,366 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Copyright by The HDF Group.                                               *
+ * All rights reserved.                                                      *
+ *                                                                           *
+ * This file is part of HDF5.  The full HDF5 copyright notice, including     *
+ * terms governing use, modification, and redistribution, is contained in    *
+ * the COPYING file, which can be found at the root of the source code       *
+ * distribution tree, or in https://www.hdfgroup.org/licenses.               *
+ * If you do not have access to either file, you may request a copy from     *
+ * help@hdfgroup.org.                                                        *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Example of using the parallel HDF5 library to collectively write to
+ * datasets with filters applied to them when one or MPI ranks do not
+ * have data to contribute to the dataset.
+ *
+ * If the HDF5_NOCLEANUP environment variable is set, the file that
+ * this example creates will not be removed as the example finishes.
+ *
+ * The need of requirement of parallel file prefix is that in general
+ * the current working directory in which compiling is done, is not suitable
+ * for parallel I/O and there is no standard pathname for parallel file
+ * systems. In some cases, the parallel file name may even need some
+ * parallel file type prefix such as: "pfs:/GF/...".  Therefore, this
+ * example parses the HDF5_PARAPREFIX environment variable for a prefix,
+ * if one is needed.
+ */
+
+#include <stdlib.h>
+
+#include "hdf5.h"
+
+#if defined(H5_HAVE_PARALLEL) && defined(H5_HAVE_PARALLEL_FILTERED_WRITES)
+
+#define EXAMPLE_FILE      "ph5_filtered_writes_no_sel.h5"
+#define EXAMPLE_DSET_NAME "DSET"
+
+#define EXAMPLE_DSET_DIMS           2
+#define EXAMPLE_DSET_CHUNK_DIM_SIZE 10
+
+/* Dataset datatype */
+#define HDF5_DATATYPE H5T_NATIVE_INT
+typedef int C_DATATYPE;
+
+/* Global variables */
+int mpi_rank, mpi_size;
+
+/*
+ * Routine to set an HDF5 filter on the given DCPL
+ */
+static void
+set_filter(hid_t dcpl_id)
+{
+    htri_t filter_avail;
+
+    /*
+     * Check if 'deflate' filter is available
+     */
+    filter_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE);
+    if (filter_avail < 0)
+        return;
+    else if (filter_avail) {
+        /*
+         * Set 'deflate' filter with reasonable
+         * compression level on DCPL
+         */
+        H5Pset_deflate(dcpl_id, 6);
+    }
+    else {
+        /*
+         * Set Fletcher32 checksum filter on DCPL
+         * since it is always available in HDF5
+         */
+        H5Pset_fletcher32(dcpl_id);
+    }
+}
+
+/*
+ * Routine to fill a data buffer with data. Assumes
+ * dimension rank is 2 and data is stored contiguous.
+ */
+void
+fill_databuf(hsize_t start[], hsize_t count[], hsize_t stride[], C_DATATYPE *data)
+{
+    C_DATATYPE *dataptr = data;
+    hsize_t     i, j;
+
+    /* Use MPI rank value for data */
+    for (i = 0; i < count[0]; i++) {
+        for (j = 0; j < count[1]; j++) {
+            *dataptr++ = mpi_rank;
+        }
+    }
+}
+
+/* Cleanup created file */
+static void
+cleanup(char *filename)
+{
+    hbool_t do_cleanup = getenv(HDF5_NOCLEANUP) ? 0 : 1;
+
+    if (do_cleanup)
+        MPI_File_delete(filename, MPI_INFO_NULL);
+}
+
+/*
+ * Routine to write to a dataset in a fashion
+ * where no chunks in the dataset are written
+ * to by more than 1 MPI rank. This will
+ * generally give the best performance as the
+ * MPI ranks will need the least amount of
+ * inter-process communication.
+ */
+static void
+write_dataset_some_no_sel(hid_t file_id, hid_t dxpl_id)
+{
+    C_DATATYPE data[EXAMPLE_DSET_CHUNK_DIM_SIZE][4 * EXAMPLE_DSET_CHUNK_DIM_SIZE];
+    hsize_t    dataset_dims[EXAMPLE_DSET_DIMS];
+    hsize_t    chunk_dims[EXAMPLE_DSET_DIMS];
+    hsize_t    start[EXAMPLE_DSET_DIMS];
+    hsize_t    stride[EXAMPLE_DSET_DIMS];
+    hsize_t    count[EXAMPLE_DSET_DIMS];
+    hbool_t    no_selection;
+    size_t     i, j;
+    hid_t      dset_id        = H5I_INVALID_HID;
+    hid_t      dcpl_id        = H5I_INVALID_HID;
+    hid_t      file_dataspace = H5I_INVALID_HID;
+
+    /*
+     * ------------------------------------
+     * Setup Dataset Creation Property List
+     * ------------------------------------
+     */
+
+    dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
+
+    /*
+     * REQUIRED: Dataset chunking must be enabled to
+     *           apply a data filter to the dataset.
+     *           Chunks in the dataset are of size
+     *           EXAMPLE_DSET_CHUNK_DIM_SIZE x EXAMPLE_DSET_CHUNK_DIM_SIZE.
+     */
+    chunk_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+    chunk_dims[1] = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+    H5Pset_chunk(dcpl_id, EXAMPLE_DSET_DIMS, chunk_dims);
+
+    /* Set filter to be applied to created datasets */
+    set_filter(dcpl_id);
+
+    /*
+     * ------------------------------------
+     * Define the dimensions of the dataset
+     * and create it
+     * ------------------------------------
+     */
+
+    /*
+     * Create a dataset composed of 4 chunks
+     * per MPI rank. The first dataset dimension
+     * scales according to the number of MPI ranks.
+     * The second dataset dimension stays fixed
+     * according to the chunk size.
+     */
+    dataset_dims[0] = EXAMPLE_DSET_CHUNK_DIM_SIZE * mpi_size;
+    dataset_dims[1] = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;
+
+    file_dataspace = H5Screate_simple(EXAMPLE_DSET_DIMS, dataset_dims, NULL);
+
+    /* Create the dataset */
+    dset_id = H5Dcreate2(file_id, EXAMPLE_DSET_NAME, HDF5_DATATYPE, file_dataspace, H5P_DEFAULT, dcpl_id,
+                         H5P_DEFAULT);
+
+    /*
+     * ------------------------------------
+     * Setup selection in the dataset for
+     * each MPI rank
+     * ------------------------------------
+     */
+
+    /*
+     * Odd rank value MPI ranks do not
+     * contribute any data to the dataset.
+     */
+    no_selection = (mpi_rank % 2) == 1;
+
+    if (no_selection) {
+        /*
+         * MPI ranks not contributing data to
+         * the dataset should call H5Sselect_none
+         * on the file dataspace that will be
+         * passed to H5Dwrite.
+         */
+        H5Sselect_none(file_dataspace);
+    }
+    else {
+        /*
+         * Even MPI ranks contribute data to
+         * the dataset. Each MPI rank's selection
+         * covers a single chunk in the first dataset
+         * dimension. Each MPI rank's selection
+         * covers 4 chunks in the second dataset
+         * dimension. This leads to each contributing
+         * MPI rank writing to 4 chunks of the dataset.
+         */
+        start[0]  = mpi_rank * EXAMPLE_DSET_CHUNK_DIM_SIZE;
+        start[1]  = 0;
+        stride[0] = 1;
+        stride[1] = 1;
+        count[0]  = EXAMPLE_DSET_CHUNK_DIM_SIZE;
+        count[1]  = 4 * EXAMPLE_DSET_CHUNK_DIM_SIZE;
+
+        H5Sselect_hyperslab(file_dataspace, H5S_SELECT_SET, start, stride, count, NULL);
+
+        /*
+         * --------------------------------------
+         * Fill data buffer with MPI rank's rank
+         * value to make it easy to see which
+         * part of the dataset each rank wrote to
+         * --------------------------------------
+         */
+
+        fill_databuf(start, count, stride, &data[0][0]);
+    }
+
+    /*
+     * ---------------------------------
+     * Write to the dataset collectively
+     * ---------------------------------
+     */
+
+    H5Dwrite(dset_id, HDF5_DATATYPE, no_selection ? H5S_ALL : H5S_BLOCK, file_dataspace, dxpl_id, data);
+
+    /*
+     * --------------
+     * Close HDF5 IDs
+     * --------------
+     */
+
+    H5Sclose(file_dataspace);
+    H5Pclose(dcpl_id);
+    H5Dclose(dset_id);
+}
+
+int
+main(int argc, char **argv)
+{
+    MPI_Comm comm       = MPI_COMM_WORLD;
+    MPI_Info info       = MPI_INFO_NULL;
+    hid_t    file_id    = H5I_INVALID_HID;
+    hid_t    fapl_id    = H5I_INVALID_HID;
+    hid_t    dxpl_id    = H5I_INVALID_HID;
+    char *   par_prefix = NULL;
+    char     filename[PATH_MAX];
+
+    MPI_Init(&argc, &argv);
+    MPI_Comm_size(comm, &mpi_size);
+    MPI_Comm_rank(comm, &mpi_rank);
+
+    /*
+     * ----------------------------------
+     * Start parallel access to HDF5 file
+     * ----------------------------------
+     */
+
+    /* Setup File Access Property List with parallel I/O access */
+    fapl_id = H5Pcreate(H5P_FILE_ACCESS);
+    H5Pset_fapl_mpio(fapl_id, comm, info);
+
+    /*
+     * OPTIONAL: Set collective metadata reads on FAPL to allow
+     *           parallel writes to filtered datasets to perform
+     *           better at scale. While not strictly necessary,
+     *           this is generally recommended.
+     */
+    H5Pset_all_coll_metadata_ops(fapl_id, true);
+
+    /*
+     * OPTIONAL: Set the latest file format version for HDF5 in
+     *           order to gain access to different dataset chunk
+     *           index types and better data encoding methods.
+     *           While not strictly necessary, this is generally
+     *           recommended.
+     */
+    H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST);
+
+    /* Parse any parallel prefix and create filename */
+    par_prefix = getenv("HDF5_PARAPREFIX");
+
+    snprintf(filename, PATH_MAX, "%s%s%s", par_prefix ? par_prefix : "", par_prefix ? "/" : "", EXAMPLE_FILE);
+
+    /* Create HDF5 file */
+    file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
+
+    /*
+     * --------------------------------------
+     * Setup Dataset Transfer Property List
+     * with collective I/O
+     * --------------------------------------
+     */
+
+    dxpl_id = H5Pcreate(H5P_DATASET_XFER);
+
+    /*
+     * REQUIRED: Setup collective I/O for the dataset
+     *           write operations. Parallel writes to
+     *           filtered datasets MUST be collective,
+     *           even if some ranks have no data to
+     *           contribute to the write operation.
+     */
+    H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE);
+
+    /*
+     * --------------------------------
+     * Create and write to the dataset
+     * --------------------------------
+     */
+
+    /*
+     * Write to a dataset in a fashion where no
+     * chunks in the dataset are written to by
+     * more than 1 MPI rank and some MPI ranks
+     * have nothing to contribute to the dataset.
+     * In this case, the MPI ranks that have no
+     * data to contribute must still participate
+     * in the collective H5Dwrite call, but should
+     * call H5Sselect_none on the file dataspace
+     * passed to the H5Dwrite call.
+     */
+    write_dataset_some_no_sel(file_id, dxpl_id);
+
+    /*
+     * ------------------
+     * Close all HDF5 IDs
+     * ------------------
+     */
+
+    H5Pclose(dxpl_id);
+    H5Pclose(fapl_id);
+    H5Fclose(file_id);
+
+    printf("PHDF5 example finished with no errors\n");
+
+    /*
+     * ------------------------------------
+     * Cleanup created HDF5 file and finish
+     * ------------------------------------
+     */
+
+    cleanup(filename);
+
+    MPI_Finalize();
+
+    return 0;
+}
+
+#else
+
+int
+main(void)
+{
+    printf("HDF5 not configured with parallel support or parallel filtered writes are disabled!\n");
+    return 0;
+}
+
+#endif
diff --git a/examples/ph5example.c b/examples/ph5example.c
index 23af477..36fbfd5 100644
--- a/examples/ph5example.c
+++ b/examples/ph5example.c
@@ -1073,11 +1073,11 @@ main(int argc, char **argv)
 finish:
     if (mpi_rank == 0) { /* only process 0 reports */
         if (nerrors)
-            printf("***PHDF5 tests detected %d errors***\n", nerrors);
+            printf("***PHDF5 example detected %d errors***\n", nerrors);
         else {
-            printf("===================================\n");
-            printf("PHDF5 tests finished with no errors\n");
-            printf("===================================\n");
+            printf("=====================================\n");
+            printf("PHDF5 example finished with no errors\n");
+            printf("=====================================\n");
         }
     }
     if (docleanup)
diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt
index 10663f2..a165433 100644
--- a/release_docs/RELEASE.txt
+++ b/release_docs/RELEASE.txt
@@ -883,8 +883,46 @@ New Features
 
     Parallel Library:
     -----------------
-    -
+    - Several improvements to parallel compression feature, including:
+
+      * Improved support for collective I/O (for both writes and reads)
+
+      * Significant reduction of memory usage for the feature as a whole
+
+      * Reduction of copying of application data buffers passed to H5Dwrite
+
+      * Addition of support for incremental file space allocation for filtered
+        datasets created in parallel. Incremental file space allocation is the
+        default for these types of datasets (early file space allocation is
+        also still supported), while early file space allocation is still the
+        default (and only supported allocation time) for unfiltered datasets
+        created in parallel. Incremental file space allocation should help with
+        parallel HDF5 applications that wish to use fill values on filtered
+        datasets, but would typically avoid doing so since dataset creation in
+        parallel would often take an excessive amount of time. Since these
+        datasets previously used early file space allocation, HDF5 would
+        allocate space for and write fill values to every chunk in the dataset
+        at creation time, leading to noticeable overhead. Instead, with
+        incremental file space allocation, allocation of file space for chunks
+        and writing of fill values to those chunks will be delayed until each
+        individual chunk is initially written to.
+
+      * Addition of support for HDF5's "don't filter partial edge chunks" flag
+        (https://portal.hdfgroup.org/display/HDF5/H5P_SET_CHUNK_OPTS)
+
+      * Addition of proper support for HDF5 fill values with the feature
+
+      * Addition of 'H5_HAVE_PARALLEL_FILTERED_WRITES' macro to H5pubconf.h
+        so HDF5 applications can determine at compile-time whether the feature
+        is available
+
+      * Addition of simple examples (ph5_filtered_writes.c and
+        ph5_filtered_writes_no_sel.c) under examples directory to demonstrate
+        usage of the feature
+
+      * Improved coverage of regression testing for the feature
 
+      (JTH - 2022/2/23)
 
     Fortran Library:
     ----------------
diff --git a/src/H5Dchunk.c b/src/H5Dchunk.c
index 1b0e579..e4d8706 100644
--- a/src/H5Dchunk.c
+++ b/src/H5Dchunk.c
@@ -239,10 +239,14 @@ typedef struct H5D_chunk_file_iter_ud_t {
 
 #ifdef H5_HAVE_PARALLEL
 /* information to construct a collective I/O operation for filling chunks */
-typedef struct H5D_chunk_coll_info_t {
-    size_t   num_io; /* Number of write operations */
-    haddr_t *addr;   /* array of the file addresses of the write operation */
-} H5D_chunk_coll_info_t;
+typedef struct H5D_chunk_coll_fill_info_t {
+    size_t num_chunks; /* Number of chunks in the write operation */
+    struct chunk_coll_fill_info {
+        haddr_t addr;       /* File address of the chunk */
+        size_t  chunk_size; /* Size of the chunk in the file */
+        hbool_t unfiltered_partial_chunk;
+    } * chunk_info;
+} H5D_chunk_coll_fill_info_t;
 #endif /* H5_HAVE_PARALLEL */
 
 typedef struct H5D_chunk_iter_ud_t {
@@ -287,9 +291,6 @@ static int H5D__chunk_format_convert_cb(const H5D_chunk_rec_t *chunk_rec, void *
 /* Helper routines */
 static herr_t   H5D__chunk_set_info_real(H5O_layout_chunk_t *layout, unsigned ndims, const hsize_t *curr_dims,
                                          const hsize_t *max_dims);
-static void *   H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline);
-static void *   H5D__chunk_mem_xfree(void *chk, const void *pline);
-static void *   H5D__chunk_mem_realloc(void *chk, size_t size, const H5O_pline_t *pline);
 static herr_t   H5D__chunk_cinfo_cache_reset(H5D_chunk_cached_t *last);
 static herr_t   H5D__chunk_cinfo_cache_update(H5D_chunk_cached_t *last, const H5D_chunk_ud_t *udata);
 static hbool_t  H5D__chunk_cinfo_cache_found(const H5D_chunk_cached_t *last, H5D_chunk_ud_t *udata);
@@ -306,8 +307,6 @@ static herr_t   H5D__chunk_mem_cb(void *elem, const H5T_t *type, unsigned ndims,
 static unsigned H5D__chunk_hash_val(const H5D_shared_t *shared, const hsize_t *scaled);
 static herr_t   H5D__chunk_flush_entry(const H5D_t *dset, H5D_rdcc_ent_t *ent, hbool_t reset);
 static herr_t   H5D__chunk_cache_evict(const H5D_t *dset, H5D_rdcc_ent_t *ent, hbool_t flush);
-static hbool_t  H5D__chunk_is_partial_edge_chunk(unsigned dset_ndims, const uint32_t *chunk_dims,
-                                                 const hsize_t *chunk_scaled, const hsize_t *dset_dims);
 static void *   H5D__chunk_lock(const H5D_io_info_t *io_info, H5D_chunk_ud_t *udata, hbool_t relax,
                                 hbool_t prev_unfilt_chunk);
 static herr_t   H5D__chunk_unlock(const H5D_io_info_t *io_info, const H5D_chunk_ud_t *udata, hbool_t dirty,
@@ -315,9 +314,9 @@ static herr_t   H5D__chunk_unlock(const H5D_io_info_t *io_info, const H5D_chunk_
 static herr_t   H5D__chunk_cache_prune(const H5D_t *dset, size_t size);
 static herr_t   H5D__chunk_prune_fill(H5D_chunk_it_ud1_t *udata, hbool_t new_unfilt_chunk);
 #ifdef H5_HAVE_PARALLEL
-static herr_t H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info,
-                                         size_t chunk_size, const void *fill_buf);
-static int    H5D__chunk_cmp_addr(const void *addr1, const void *addr2);
+static herr_t H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_fill_info_t *chunk_fill_info,
+                                         const void *fill_buf, const void *partial_chunk_fill_buf);
+static int    H5D__chunk_cmp_coll_fill_info(const void *_entry1, const void *_entry2);
 #endif /* H5_HAVE_PARALLEL */
 
 /* Debugging helper routine callback */
@@ -1362,7 +1361,7 @@ done:
  *
  *-------------------------------------------------------------------------
  */
-static void *
+void *
 H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline)
 {
     void *ret_value = NULL; /* Return value */
@@ -1393,7 +1392,7 @@ H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline)
  *
  *-------------------------------------------------------------------------
  */
-static void *
+void *
 H5D__chunk_mem_xfree(void *chk, const void *_pline)
 {
     const H5O_pline_t *pline = (const H5O_pline_t *)_pline;
@@ -1417,7 +1416,7 @@ H5D__chunk_mem_xfree(void *chk, const void *_pline)
  *             calls H5D__chunk_mem_xfree and discards the return value.
  *-------------------------------------------------------------------------
  */
-static void
+void
 H5D__chunk_mem_free(void *chk, const void *_pline)
 {
     (void)H5D__chunk_mem_xfree(chk, _pline);
@@ -1437,7 +1436,7 @@ H5D__chunk_mem_free(void *chk, const void *_pline)
  *
  *-------------------------------------------------------------------------
  */
-static void *
+void *
 H5D__chunk_mem_realloc(void *chk, size_t size, const H5O_pline_t *pline)
 {
     void *ret_value = NULL; /* Return value */
@@ -4320,8 +4319,8 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const
     hbool_t blocks_written = FALSE; /* Flag to indicate that chunk was actually written */
     hbool_t using_mpi =
         FALSE; /* Flag to indicate that the file is being accessed with an MPI-capable file driver */
-    H5D_chunk_coll_info_t chunk_info; /* chunk address information for doing I/O */
-#endif                                /* H5_HAVE_PARALLEL */
+    H5D_chunk_coll_fill_info_t chunk_fill_info; /* chunk address information for doing I/O */
+#endif                                          /* H5_HAVE_PARALLEL */
     hbool_t             carry; /* Flag to indicate that chunk increment carrys to higher dimension (sorta) */
     unsigned            space_ndims;                     /* Dataset's space rank */
     const hsize_t *     space_dim;                       /* Dataset's dataspace dimensions */
@@ -4368,8 +4367,8 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const
         using_mpi = TRUE;
 
         /* init chunk info stuff for collective I/O */
-        chunk_info.num_io = 0;
-        chunk_info.addr   = NULL;
+        chunk_fill_info.num_chunks = 0;
+        chunk_fill_info.chunk_info = NULL;
     }  /* end if */
 #endif /* H5_HAVE_PARALLEL */
 
@@ -4641,19 +4640,26 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const
                 if (using_mpi) {
                     /* collect all chunk addresses to be written to
                        write collectively at the end */
-                    /* allocate/resize address array if no more space left */
-                    /* Note that if we add support for parallel filters we must
-                     * also store an array of chunk sizes and pass it to the
-                     * apporpriate collective write function */
-                    if (0 == chunk_info.num_io % 1024)
-                        if (NULL == (chunk_info.addr = (haddr_t *)H5MM_realloc(
-                                         chunk_info.addr, (chunk_info.num_io + 1024) * sizeof(haddr_t))))
+
+                    /* allocate/resize chunk info array if no more space left */
+                    if (0 == chunk_fill_info.num_chunks % 1024) {
+                        void *tmp_realloc;
+
+                        if (NULL == (tmp_realloc = H5MM_realloc(chunk_fill_info.chunk_info,
+                                                                (chunk_fill_info.num_chunks + 1024) *
+                                                                    sizeof(struct chunk_coll_fill_info))))
                             HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
-                                        "memory allocation failed for chunk addresses")
+                                        "memory allocation failed for chunk fill info")
+
+                        chunk_fill_info.chunk_info = tmp_realloc;
+                    }
 
-                    /* Store the chunk's address for later */
-                    chunk_info.addr[chunk_info.num_io] = udata.chunk_block.offset;
-                    chunk_info.num_io++;
+                    /* Store info about the chunk for later */
+                    chunk_fill_info.chunk_info[chunk_fill_info.num_chunks].addr = udata.chunk_block.offset;
+                    chunk_fill_info.chunk_info[chunk_fill_info.num_chunks].chunk_size = chunk_size;
+                    chunk_fill_info.chunk_info[chunk_fill_info.num_chunks].unfiltered_partial_chunk =
+                        (*fill_buf == unfilt_fill_buf);
+                    chunk_fill_info.num_chunks++;
 
                     /* Indicate that blocks will be written */
                     blocks_written = TRUE;
@@ -4726,7 +4732,7 @@ H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_overwrite, const
 #ifdef H5_HAVE_PARALLEL
     /* do final collective I/O */
     if (using_mpi && blocks_written)
-        if (H5D__chunk_collective_fill(dset, &chunk_info, chunk_size, fb_info.fill_buf) < 0)
+        if (H5D__chunk_collective_fill(dset, &chunk_fill_info, fb_info.fill_buf, unfilt_fill_buf) < 0)
             HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "unable to write raw data to file")
 #endif /* H5_HAVE_PARALLEL */
 
@@ -4742,8 +4748,8 @@ done:
     unfilt_fill_buf = H5D__chunk_mem_xfree(unfilt_fill_buf, &def_pline);
 
 #ifdef H5_HAVE_PARALLEL
-    if (using_mpi && chunk_info.addr)
-        H5MM_free(chunk_info.addr);
+    if (using_mpi && chunk_fill_info.chunk_info)
+        H5MM_free(chunk_fill_info.chunk_info);
 #endif
 
     FUNC_LEAVE_NOAPI(ret_value)
@@ -4937,27 +4943,35 @@ done:
  *-------------------------------------------------------------------------
  */
 static herr_t
-H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info, size_t chunk_size,
-                           const void *fill_buf)
+H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_fill_info_t *chunk_fill_info,
+                           const void *fill_buf, const void *partial_chunk_fill_buf)
 {
-    MPI_Comm         mpi_comm = MPI_COMM_NULL;    /* MPI communicator for file */
-    int              mpi_rank = (-1);             /* This process's rank  */
-    int              mpi_size = (-1);             /* MPI Comm size  */
-    int              mpi_code;                    /* MPI return code */
-    size_t           num_blocks;                  /* Number of blocks between processes. */
-    size_t           leftover_blocks;             /* Number of leftover blocks to handle */
-    int              blocks, leftover, block_len; /* converted to int for MPI */
+    MPI_Comm         mpi_comm = MPI_COMM_NULL; /* MPI communicator for file */
+    int              mpi_rank = (-1);          /* This process's rank  */
+    int              mpi_size = (-1);          /* MPI Comm size  */
+    int              mpi_code;                 /* MPI return code */
+    size_t           num_blocks;               /* Number of blocks between processes. */
+    size_t           leftover_blocks;          /* Number of leftover blocks to handle */
+    int              blocks, leftover;         /* converted to int for MPI */
     MPI_Aint *       chunk_disp_array = NULL;
+    MPI_Aint *       block_disps      = NULL;
     int *            block_lens       = NULL;
     MPI_Datatype     mem_type = MPI_BYTE, file_type = MPI_BYTE;
     H5FD_mpio_xfer_t prev_xfer_mode;         /* Previous data xfer mode */
     hbool_t          have_xfer_mode = FALSE; /* Whether the previous xffer mode has been retrieved */
-    hbool_t          need_addr_sort = FALSE;
-    int              i;                   /* Local index variable */
+    hbool_t          need_sort      = FALSE;
+    size_t           i;                   /* Local index variable */
     herr_t           ret_value = SUCCEED; /* Return value */
 
     FUNC_ENTER_STATIC
 
+    /*
+     * If a separate fill buffer is provided for partial chunks, ensure
+     * that the "don't filter partial edge chunks" flag is set.
+     */
+    if (partial_chunk_fill_buf)
+        HDassert(dset->shared->layout.u.chunk.flags & H5O_LAYOUT_CHUNK_DONT_FILTER_PARTIAL_BOUND_CHUNKS);
+
     /* Get the MPI communicator */
     if (MPI_COMM_NULL == (mpi_comm = H5F_mpi_get_comm(dset->oloc.file)))
         HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "Can't retrieve MPI communicator")
@@ -4973,39 +4987,89 @@ H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info,
     /* Distribute evenly the number of blocks between processes. */
     if (mpi_size == 0)
         HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "Resulted in division by zero")
-    num_blocks = (size_t)(chunk_info->num_io / (size_t)mpi_size); /* value should be the same on all procs */
+    num_blocks =
+        (size_t)(chunk_fill_info->num_chunks / (size_t)mpi_size); /* value should be the same on all procs */
 
     /* After evenly distributing the blocks between processes, are there any
      * leftover blocks for each individual process (round-robin)?
      */
-    leftover_blocks = (size_t)(chunk_info->num_io % (size_t)mpi_size);
+    leftover_blocks = (size_t)(chunk_fill_info->num_chunks % (size_t)mpi_size);
 
     /* Cast values to types needed by MPI */
     H5_CHECKED_ASSIGN(blocks, int, num_blocks, size_t);
     H5_CHECKED_ASSIGN(leftover, int, leftover_blocks, size_t);
-    H5_CHECKED_ASSIGN(block_len, int, chunk_size, size_t);
 
     /* Check if we have any chunks to write on this rank */
     if (num_blocks > 0 || (leftover && leftover > mpi_rank)) {
+        MPI_Aint partial_fill_buf_disp = 0;
+        hbool_t  all_same_block_len    = TRUE;
+
         /* Allocate buffers */
-        /* (MSC - should not need block_lens if MPI_type_create_hindexed_block is working) */
-        if (NULL == (block_lens = (int *)H5MM_malloc((size_t)(blocks + 1) * sizeof(int))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk lengths buffer")
         if (NULL == (chunk_disp_array = (MPI_Aint *)H5MM_malloc((size_t)(blocks + 1) * sizeof(MPI_Aint))))
             HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk file displacement buffer")
 
-        for (i = 0; i < blocks; i++) {
-            /* store the chunk address as an MPI_Aint */
-            chunk_disp_array[i] = (MPI_Aint)(chunk_info->addr[i + (mpi_rank * blocks)]);
+        if (partial_chunk_fill_buf) {
+            MPI_Aint fill_buf_addr;
+            MPI_Aint partial_fill_buf_addr;
+
+            /* Calculate the displacement between the fill buffer and partial chunk fill buffer */
+            if (MPI_SUCCESS != (mpi_code = MPI_Get_address(fill_buf, &fill_buf_addr)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code)
+            if (MPI_SUCCESS != (mpi_code = MPI_Get_address(partial_chunk_fill_buf, &partial_fill_buf_addr)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code)
 
-            /* MSC - should not need this if MPI_type_create_hindexed_block is working */
-            block_lens[i] = block_len;
+#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1
+            partial_fill_buf_disp = MPI_Aint_diff(partial_fill_buf_addr, fill_buf_addr);
+#else
+            partial_fill_buf_disp = partial_fill_buf_addr - fill_buf_addr;
+#endif
 
-            /* Make sure that the addresses in the datatype are
-             * monotonically non-decreasing
+            /*
+             * Allocate all-zero block displacements array. If a block's displacement
+             * is left as zero, that block will be written to from the regular fill
+             * buffer. If a block represents an unfiltered partial edge chunk, its
+             * displacement will be set so that the block is written to from the
+             * unfiltered fill buffer.
              */
-            if (i && (chunk_disp_array[i] < chunk_disp_array[i - 1]))
-                need_addr_sort = TRUE;
+            if (NULL == (block_disps = (MPI_Aint *)H5MM_calloc((size_t)(blocks + 1) * sizeof(MPI_Aint))))
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate block displacements buffer")
+        }
+
+        /*
+         * Perform initial scan of chunk info list to:
+         *  - make sure that chunk addresses are monotonically non-decreasing
+         *  - check if all blocks have the same length
+         */
+        for (i = 1; i < chunk_fill_info->num_chunks; i++) {
+            if (chunk_fill_info->chunk_info[i].addr < chunk_fill_info->chunk_info[i - 1].addr)
+                need_sort = TRUE;
+
+            if (chunk_fill_info->chunk_info[i].chunk_size != chunk_fill_info->chunk_info[i - 1].chunk_size)
+                all_same_block_len = FALSE;
+        }
+
+        if (need_sort)
+            HDqsort(chunk_fill_info->chunk_info, chunk_fill_info->num_chunks,
+                    sizeof(struct chunk_coll_fill_info), H5D__chunk_cmp_coll_fill_info);
+
+        /* Allocate buffer for block lengths if necessary */
+        if (!all_same_block_len)
+            if (NULL == (block_lens = (int *)H5MM_malloc((size_t)(blocks + 1) * sizeof(int))))
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk lengths buffer")
+
+        for (i = 0; i < (size_t)blocks; i++) {
+            size_t idx = i + (size_t)(mpi_rank * blocks);
+
+            /* store the chunk address as an MPI_Aint */
+            chunk_disp_array[i] = (MPI_Aint)(chunk_fill_info->chunk_info[idx].addr);
+
+            if (!all_same_block_len)
+                H5_CHECKED_ASSIGN(block_lens[i], int, chunk_fill_info->chunk_info[idx].chunk_size, size_t);
+
+            if (chunk_fill_info->chunk_info[idx].unfiltered_partial_chunk) {
+                HDassert(partial_chunk_fill_buf);
+                block_disps[i] = partial_fill_buf_disp;
+            }
         } /* end for */
 
         /* Calculate if there are any leftover blocks after evenly
@@ -5013,32 +5077,71 @@ H5D__chunk_collective_fill(const H5D_t *dset, H5D_chunk_coll_info_t *chunk_info,
          * to processes 0 -> leftover.
          */
         if (leftover && leftover > mpi_rank) {
-            chunk_disp_array[blocks] = (MPI_Aint)chunk_info->addr[(blocks * mpi_size) + mpi_rank];
-            if (blocks && (chunk_disp_array[blocks] < chunk_disp_array[blocks - 1]))
-                need_addr_sort = TRUE;
-            block_lens[blocks] = block_len;
+            chunk_disp_array[blocks] =
+                (MPI_Aint)chunk_fill_info->chunk_info[(blocks * mpi_size) + mpi_rank].addr;
+
+            if (!all_same_block_len)
+                H5_CHECKED_ASSIGN(block_lens[blocks], int,
+                                  chunk_fill_info->chunk_info[(blocks * mpi_size) + mpi_rank].chunk_size,
+                                  size_t);
+
+            if (chunk_fill_info->chunk_info[(blocks * mpi_size) + mpi_rank].unfiltered_partial_chunk) {
+                HDassert(partial_chunk_fill_buf);
+                block_disps[blocks] = partial_fill_buf_disp;
+            }
+
             blocks++;
         }
 
-        /* Ensure that the blocks are sorted in monotonically non-decreasing
-         * order of offset in the file.
-         */
-        if (need_addr_sort)
-            HDqsort(chunk_disp_array, (size_t)blocks, sizeof(MPI_Aint), H5D__chunk_cmp_addr);
+        /* Create file and memory types for the write operation */
+        if (all_same_block_len) {
+            int block_len;
+
+            H5_CHECKED_ASSIGN(block_len, int, chunk_fill_info->chunk_info[0].chunk_size, size_t);
+
+            mpi_code =
+                MPI_Type_create_hindexed_block(blocks, block_len, chunk_disp_array, MPI_BYTE, &file_type);
+            if (mpi_code != MPI_SUCCESS)
+                HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code)
+
+            if (partial_chunk_fill_buf) {
+                /*
+                 * If filters are disabled for partial edge chunks, those chunks could
+                 * potentially have the same block length as the other chunks, but still
+                 * need to be written to using the unfiltered fill buffer. Use an hindexed
+                 * block type rather than an hvector.
+                 */
+                mpi_code =
+                    MPI_Type_create_hindexed_block(blocks, block_len, block_disps, MPI_BYTE, &mem_type);
+                if (mpi_code != MPI_SUCCESS)
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed_block failed", mpi_code)
+            }
+            else {
+                mpi_code = MPI_Type_create_hvector(blocks, block_len, 0, MPI_BYTE, &mem_type);
+                if (mpi_code != MPI_SUCCESS)
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
+            }
+        }
+        else {
+            /*
+             * Currently, different block lengths implies that there are partial
+             * edge chunks and the "don't filter partial edge chunks" flag is set.
+             */
+            HDassert(partial_chunk_fill_buf);
+            HDassert(block_lens);
+            HDassert(block_disps);
+
+            mpi_code = MPI_Type_create_hindexed(blocks, block_lens, chunk_disp_array, MPI_BYTE, &file_type);
+            if (mpi_code != MPI_SUCCESS)
+                HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+
+            mpi_code = MPI_Type_create_hindexed(blocks, block_lens, block_disps, MPI_BYTE, &mem_type);
+            if (mpi_code != MPI_SUCCESS)
+                HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+        }
 
-        /* MSC - should use this if MPI_type_create_hindexed block is working:
-         * mpi_code = MPI_Type_create_hindexed_block(blocks, block_len, chunk_disp_array, MPI_BYTE,
-         * &file_type);
-         */
-        mpi_code = MPI_Type_create_hindexed(blocks, block_lens, chunk_disp_array, MPI_BYTE, &file_type);
-        if (mpi_code != MPI_SUCCESS)
-            HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
         if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type)))
             HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-
-        mpi_code = MPI_Type_create_hvector(blocks, block_len, 0, MPI_BYTE, &mem_type);
-        if (mpi_code != MPI_SUCCESS)
-            HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hvector failed", mpi_code)
         if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&mem_type)))
             HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
     } /* end if */
@@ -5081,39 +5184,25 @@ done:
         if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
             HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
     H5MM_xfree(chunk_disp_array);
+    H5MM_xfree(block_disps);
     H5MM_xfree(block_lens);
 
     FUNC_LEAVE_NOAPI(ret_value)
 } /* end H5D__chunk_collective_fill() */
 
 static int
-H5D__chunk_cmp_addr(const void *addr1, const void *addr2)
+H5D__chunk_cmp_coll_fill_info(const void *_entry1, const void *_entry2)
 {
-    MPI_Aint _addr1 = (MPI_Aint)0, _addr2 = (MPI_Aint)0;
-    int      ret_value = 0;
+    const struct chunk_coll_fill_info *entry1;
+    const struct chunk_coll_fill_info *entry2;
 
     FUNC_ENTER_STATIC_NOERR
 
-    _addr1 = *((const MPI_Aint *)addr1);
-    _addr2 = *((const MPI_Aint *)addr2);
+    entry1 = (const struct chunk_coll_fill_info *)_entry1;
+    entry2 = (const struct chunk_coll_fill_info *)_entry2;
 
-#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1
-    {
-        MPI_Aint diff = MPI_Aint_diff(_addr1, _addr2);
-
-        if (diff < (MPI_Aint)0)
-            ret_value = -1;
-        else if (diff > (MPI_Aint)0)
-            ret_value = 1;
-        else
-            ret_value = 0;
-    }
-#else
-    ret_value = (_addr1 > _addr2) - (_addr1 < _addr2);
-#endif
-
-    FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D__chunk_cmp_addr() */
+    FUNC_LEAVE_NOAPI(H5F_addr_cmp(entry1->addr, entry2->addr))
+} /* end H5D__chunk_cmp_coll_fill_info() */
 #endif /* H5_HAVE_PARALLEL */
 
 /*-------------------------------------------------------------------------
@@ -6827,7 +6916,7 @@ done:
  *
  *-------------------------------------------------------------------------
  */
-static hbool_t
+hbool_t
 H5D__chunk_is_partial_edge_chunk(unsigned dset_ndims, const uint32_t *chunk_dims, const hsize_t scaled[],
                                  const hsize_t *dset_dims)
 {
@@ -7122,6 +7211,89 @@ done:
 } /* end H5D__chunk_format_convert() */
 
 /*-------------------------------------------------------------------------
+ * Function:    H5D__chunk_index_empty_cb
+ *
+ * Purpose:     Callback function that simply stops iteration and sets the
+ *              `empty` parameter to FALSE if called. If this callback is
+ *              entered, it means that the chunk index contains at least
+ *              one chunk, so is not empty.
+ *
+ * Return:      H5_ITER_STOP
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+H5D__chunk_index_empty_cb(const H5D_chunk_rec_t H5_ATTR_UNUSED *chunk_rec, void *_udata)
+{
+    hbool_t *empty     = (hbool_t *)_udata;
+    int      ret_value = H5_ITER_STOP;
+
+    FUNC_ENTER_STATIC_NOERR
+
+    *empty = FALSE;
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__chunk_index_empty_cb() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__chunk_index_empty
+ *
+ * Purpose:     Determines whether a chunk index is empty (has no chunks
+ *              inserted into it yet).
+ *
+ * Note:        This routine is meant to be a little more performant than
+ *              just counting the number of chunks in the index. In the
+ *              future, this is probably a callback that the chunk index
+ *              ops structure should provide.
+ *
+ * Return:      Non-negative on Success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5D__chunk_index_empty(const H5D_t *dset, hbool_t *empty)
+{
+    H5D_chk_idx_info_t idx_info;            /* Chunked index info */
+    H5D_rdcc_ent_t *   ent;                 /* Cache entry  */
+    const H5D_rdcc_t * rdcc      = NULL;    /* Raw data chunk cache */
+    herr_t             ret_value = SUCCEED; /* Return value */
+
+    FUNC_ENTER_PACKAGE_TAG(dset->oloc.addr)
+
+    HDassert(dset);
+    HDassert(dset->shared);
+    HDassert(empty);
+
+    rdcc = &(dset->shared->cache.chunk); /* raw data chunk cache */
+    HDassert(rdcc);
+
+    /* Search for cached chunks that haven't been written out */
+    for (ent = rdcc->head; ent; ent = ent->next)
+        /* Flush the chunk out to disk, to make certain the size is correct later */
+        if (H5D__chunk_flush_entry(dset, ent, FALSE) < 0)
+            HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "cannot flush indexed storage buffer")
+
+    /* Compose chunked index info struct */
+    idx_info.f       = dset->oloc.file;
+    idx_info.pline   = &dset->shared->dcpl_cache.pline;
+    idx_info.layout  = &dset->shared->layout.u.chunk;
+    idx_info.storage = &dset->shared->layout.storage.u.chunk;
+
+    *empty = TRUE;
+
+    if (H5F_addr_defined(idx_info.storage->idx_addr)) {
+        /* Iterate over the allocated chunks */
+        if ((dset->shared->layout.storage.u.chunk.ops->iterate)(&idx_info, H5D__chunk_index_empty_cb, empty) <
+            0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                        "unable to retrieve allocated chunk information from index")
+    }
+
+done:
+    FUNC_LEAVE_NOAPI_TAG(ret_value)
+} /* end H5D__chunk_index_empty() */
+
+/*-------------------------------------------------------------------------
  * Function:    H5D__get_num_chunks_cb
  *
  * Purpose:     Callback function that increments the number of written
diff --git a/src/H5Dint.c b/src/H5Dint.c
index c9ea6bd..cc17265 100644
--- a/src/H5Dint.c
+++ b/src/H5Dint.c
@@ -378,40 +378,18 @@ H5D__get_space_status(const H5D_t *dset, H5D_space_status_t *allocation)
 
     /* Check for chunked layout */
     if (dset->shared->layout.type == H5D_CHUNKED) {
-        hsize_t  space_allocated; /* The number of bytes allocated for chunks */
-        hssize_t snelmts;         /* Temporary holder for number of elements in dataspace */
-        hsize_t  nelmts;          /* Number of elements in dataspace */
-        size_t   dt_size;         /* Size of datatype */
-        hsize_t  full_size;       /* The number of bytes in the dataset when fully populated */
-
-        /* For chunked layout set the space status by the storage size */
-        /* Get the dataset's dataspace */
-        HDassert(dset->shared->space);
-
-        /* Get the total number of elements in dataset's dataspace */
-        if ((snelmts = H5S_GET_EXTENT_NPOINTS(dset->shared->space)) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve number of elements in dataspace")
-        nelmts = (hsize_t)snelmts;
-
-        /* Get the size of the dataset's datatype */
-        if (0 == (dt_size = H5T_GET_SIZE(dset->shared->type)))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to retrieve size of datatype")
-
-        /* Compute the maximum size of the dataset in bytes */
-        full_size = nelmts * dt_size;
-
-        /* Check for overflow during multiplication */
-        if (nelmts != (full_size / dt_size))
-            HGOTO_ERROR(H5E_DATASET, H5E_OVERFLOW, FAIL, "size of dataset's storage overflowed")
-
-        /* Difficult to error check, since the error value is 0 and 0 is a valid value... :-/ */
-        if (H5D__get_storage_size(dset, &space_allocated) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get size of dataset's storage")
-
-        /* Decide on how much of the space is allocated */
-        if (space_allocated == 0)
+        hsize_t n_chunks_total = dset->shared->layout.u.chunk.nchunks;
+        hsize_t n_chunks_alloc = 0;
+
+        if (H5D__get_num_chunks(dset, dset->shared->space, &n_chunks_alloc) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                        "unable to retrieve number of allocated chunks in dataset")
+
+        HDassert(n_chunks_alloc <= n_chunks_total);
+
+        if (n_chunks_alloc == 0)
             *allocation = H5D_SPACE_STATUS_NOT_ALLOCATED;
-        else if (space_allocated == full_size)
+        else if (n_chunks_alloc == n_chunks_total)
             *allocation = H5D_SPACE_STATUS_ALLOCATED;
         else
             *allocation = H5D_SPACE_STATUS_PART_ALLOCATED;
@@ -1301,10 +1279,19 @@ H5D__create(H5F_t *file, hid_t type_id, const H5S_t *space, hid_t dcpl_id, hid_t
             HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, NULL, "can't set latest indexing")
     } /* end if */
 
-    /* Check if this dataset is going into a parallel file and set space allocation time */
+    /* Check if the file driver would like to force early space allocation */
     if (H5F_HAS_FEATURE(file, H5FD_FEAT_ALLOCATE_EARLY))
         new_dset->shared->dcpl_cache.fill.alloc_time = H5D_ALLOC_TIME_EARLY;
 
+    /*
+     * Check if this dataset is going into a parallel file and set space allocation time.
+     * If the dataset has filters applied to it, writes to the dataset must be collective,
+     * so we don't need to force early space allocation. Otherwise, we force early space
+     * allocation to facilitate independent raw data operations.
+     */
+    if (H5F_HAS_FEATURE(file, H5FD_FEAT_HAS_MPI) && (new_dset->shared->dcpl_cache.pline.nused == 0))
+        new_dset->shared->dcpl_cache.fill.alloc_time = H5D_ALLOC_TIME_EARLY;
+
     /* Set the dataset's I/O operations */
     if (H5D__layout_set_io_ops(new_dset) < 0)
         HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, NULL, "unable to initialize I/O operations")
diff --git a/src/H5Dio.c b/src/H5Dio.c
index 1ea3f07..e226a0a 100644
--- a/src/H5Dio.c
+++ b/src/H5Dio.c
@@ -300,6 +300,7 @@ H5D__write(H5D_t *dataset, hid_t mem_type_id, H5S_t *mem_space, H5S_t *file_spac
     H5D_io_info_t    io_info;                     /* Dataset I/O info     */
     H5D_type_info_t  type_info;                   /* Datatype info for operation */
     hbool_t          type_info_init      = FALSE; /* Whether the datatype info has been initialized */
+    hbool_t          should_alloc_space  = FALSE; /* Whether or not to initialize dataset's storage */
     H5S_t *          projected_mem_space = NULL;  /* If not NULL, ptr to dataspace containing a     */
                                                   /* projection of the supplied mem_space to a new  */
                                                   /* dataspace with rank equal to that of           */
@@ -432,8 +433,20 @@ H5D__write(H5D_t *dataset, hid_t mem_type_id, H5S_t *mem_space, H5S_t *file_spac
         HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to set up I/O operation")
 
     /* Allocate dataspace and initialize it if it hasn't been. */
-    if (nelmts > 0 && dataset->shared->dcpl_cache.efl.nused == 0 &&
-        !(*dataset->shared->layout.ops->is_space_alloc)(&dataset->shared->layout.storage)) {
+    should_alloc_space = dataset->shared->dcpl_cache.efl.nused == 0 &&
+                         !(*dataset->shared->layout.ops->is_space_alloc)(&dataset->shared->layout.storage);
+
+    /*
+     * If not using an MPI-based VFD, we only need to allocate
+     * and initialize storage if there's a selection in the
+     * dataset's dataspace. Otherwise, we always need to participate
+     * in the storage allocation since this may use collective
+     * operations and we will hang if we don't participate.
+     */
+    if (!H5F_HAS_FEATURE(dataset->oloc.file, H5FD_FEAT_HAS_MPI))
+        should_alloc_space = should_alloc_space && (nelmts > 0);
+
+    if (should_alloc_space) {
         hssize_t file_nelmts;    /* Number of elements in file dataset's dataspace */
         hbool_t  full_overwrite; /* Whether we are over-writing all the elements */
 
@@ -808,98 +821,35 @@ H5D__ioinfo_adjust(H5D_io_info_t *io_info, const H5D_t *dset, const H5S_t *file_
             io_info->io_ops.single_write = H5D__mpio_select_write;
         } /* end if */
         else {
-            int comm_size = 0;
-
-            /* Retrieve size of MPI communicator used for file */
-            if ((comm_size = H5F_shared_mpi_get_size(io_info->f_sh)) < 0)
-                HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get MPI communicator size")
-
             /* Check if there are any filters in the pipeline. If there are,
              * we cannot break to independent I/O if this is a write operation
              * with multiple ranks involved; otherwise, there will be metadata
              * inconsistencies in the file.
              */
-            if (comm_size > 1 && io_info->op_type == H5D_IO_OP_WRITE &&
-                io_info->dset->shared->dcpl_cache.pline.nused > 0) {
-                H5D_mpio_no_collective_cause_t cause;
-                uint32_t                       local_no_collective_cause;
-                uint32_t                       global_no_collective_cause;
-                hbool_t                        local_error_message_previously_written  = FALSE;
-                hbool_t                        global_error_message_previously_written = FALSE;
-                size_t                         idx;
-                size_t                         cause_strings_len;
-                char                           local_no_collective_cause_string[512]  = "";
-                char                           global_no_collective_cause_string[512] = "";
-                const char *                   cause_strings[]                        = {
-                    "independent I/O was requested",
-                    "datatype conversions were required",
-                    "data transforms needed to be applied",
-                    "optimized MPI types flag wasn't set",
-                    "one of the dataspaces was neither simple nor scalar",
-                    "dataset was not contiguous or chunked",
-                    "parallel writes to filtered datasets are disabled",
-                    "an error occurred while checking if collective I/O was possible"};
-
-                cause_strings_len = sizeof(cause_strings) / sizeof(cause_strings[0]);
-
-                if (H5CX_get_mpio_local_no_coll_cause(&local_no_collective_cause) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
-                                "unable to get local no collective cause value")
-                if (H5CX_get_mpio_global_no_coll_cause(&global_no_collective_cause) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
-                                "unable to get global no collective cause value")
-
-                /* Append each of the "reason for breaking collective I/O" error messages to the
-                 * local and global no collective cause strings */
-                for (cause = 1, idx = 0;
-                     (cause < H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE) && (idx < cause_strings_len);
-                     cause <<= 1, idx++) {
-                    if (cause & local_no_collective_cause) {
-                        size_t local_buffer_space = sizeof(local_no_collective_cause_string) -
-                                                    HDstrlen(local_no_collective_cause_string) - 1;
-
-                        /* Check if there were any previous error messages included. If so, prepend a
-                         * semicolon to separate the messages.
-                         */
-                        if (local_buffer_space && local_error_message_previously_written) {
-                            HDstrncat(local_no_collective_cause_string, "; ", local_buffer_space);
-                            local_buffer_space -= MIN(local_buffer_space, 2);
-                        }
-
-                        if (local_buffer_space)
-                            HDstrncat(local_no_collective_cause_string, cause_strings[idx],
-                                      local_buffer_space);
-
-                        local_error_message_previously_written = TRUE;
-                    } /* end if */
-
-                    if (cause & global_no_collective_cause) {
-                        size_t global_buffer_space = sizeof(global_no_collective_cause_string) -
-                                                     HDstrlen(global_no_collective_cause_string) - 1;
-
-                        /* Check if there were any previous error messages included. If so, prepend a
-                         * semicolon to separate the messages.
-                         */
-                        if (global_buffer_space && global_error_message_previously_written) {
-                            HDstrncat(global_no_collective_cause_string, "; ", global_buffer_space);
-                            global_buffer_space -= MIN(global_buffer_space, 2);
-                        }
-
-                        if (global_buffer_space)
-                            HDstrncat(global_no_collective_cause_string, cause_strings[idx],
-                                      global_buffer_space);
-
-                        global_error_message_previously_written = TRUE;
-                    } /* end if */
-                }     /* end for */
-
-                HGOTO_ERROR(H5E_IO, H5E_NO_INDEPENDENT, FAIL,
-                            "Can't perform independent write with filters in pipeline.\n"
-                            "    The following caused a break from collective I/O:\n"
-                            "        Local causes: %s\n"
-                            "        Global causes: %s",
-                            local_no_collective_cause_string, global_no_collective_cause_string);
-            } /* end if */
+            if (io_info->op_type == H5D_IO_OP_WRITE && io_info->dset->shared->dcpl_cache.pline.nused > 0) {
+                int comm_size = 0;
+
+                /* Retrieve size of MPI communicator used for file */
+                if ((comm_size = H5F_shared_mpi_get_size(io_info->f_sh)) < 0)
+                    HGOTO_ERROR(H5E_FILE, H5E_CANTGET, FAIL, "can't get MPI communicator size")
+
+                if (comm_size > 1) {
+                    char local_no_coll_cause_string[512];
+                    char global_no_coll_cause_string[512];
+
+                    if (H5D__mpio_get_no_coll_cause_strings(local_no_coll_cause_string, 512,
+                                                            global_no_coll_cause_string, 512) < 0)
+                        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                                    "can't get reasons for breaking collective I/O")
+
+                    HGOTO_ERROR(H5E_IO, H5E_NO_INDEPENDENT, FAIL,
+                                "Can't perform independent write with filters in pipeline.\n"
+                                "    The following caused a break from collective I/O:\n"
+                                "        Local causes: %s\n"
+                                "        Global causes: %s",
+                                local_no_coll_cause_string, global_no_coll_cause_string);
+                }
+            }
 
             /* If we won't be doing collective I/O, but the user asked for
              * collective I/O, change the request to use independent I/O
diff --git a/src/H5Dmpio.c b/src/H5Dmpio.c
index ce790c4..527fc7b 100644
--- a/src/H5Dmpio.c
+++ b/src/H5Dmpio.c
@@ -36,6 +36,7 @@
 #include "H5Eprivate.h"  /* Error handling    */
 #include "H5Fprivate.h"  /* File access       */
 #include "H5FDprivate.h" /* File drivers      */
+#include "H5FLprivate.h" /* Free Lists        */
 #include "H5Iprivate.h"  /* IDs               */
 #include "H5MMprivate.h" /* Memory management */
 #include "H5Oprivate.h"  /* Object headers    */
@@ -43,6 +44,15 @@
 #include "H5Sprivate.h"  /* Dataspaces        */
 #include "H5VMprivate.h" /* Vector            */
 
+/* uthash is an external, header-only hash table implementation.
+ *
+ * We include the file directly in src/ and #define a few functions
+ * to use our internal memory calls.
+ */
+#define uthash_malloc(sz)    H5MM_malloc(sz)
+#define uthash_free(ptr, sz) H5MM_free(ptr) /* Ignoring sz is intentional */
+#include "uthash.h"
+
 #ifdef H5_HAVE_PARALLEL
 
 /****************/
@@ -81,9 +91,54 @@
 /* Macros to represent the regularity of the selection for multiple chunk IO case. */
 #define H5D_CHUNK_SELECT_REG 1
 
+/*
+ * Threshold value for redistributing shared filtered chunks
+ * on all MPI ranks, or just MPI rank 0
+ */
+#define H5D_CHUNK_REDISTRIBUTE_THRES ((size_t)((25 * H5_MB) / sizeof(H5D_chunk_redistribute_info_t)))
+
+/*
+ * Initial allocation size for the arrays that hold
+ * buffers for chunk modification data that is sent
+ * to other ranks and the MPI_Request objects for
+ * those send operations
+ */
+#define H5D_CHUNK_NUM_SEND_MSGS_INIT 64
+
+/*
+ * Define a tag value for the MPI messages sent/received for
+ * chunk modification data
+ */
+#define H5D_CHUNK_MOD_DATA_TAG 64
+
+/*
+ * Macro to initialize a H5D_chk_idx_info_t
+ * structure, given a pointer to a H5D_io_info_t
+ * structure
+ */
+#define H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, io_info_ptr)                                                \
+    do {                                                                                                     \
+        index_info.f       = (io_info_ptr)->dset->oloc.file;                                                 \
+        index_info.pline   = &((io_info_ptr)->dset->shared->dcpl_cache.pline);                               \
+        index_info.layout  = &((io_info_ptr)->dset->shared->layout.u.chunk);                                 \
+        index_info.storage = &((io_info_ptr)->dset->shared->layout.storage.u.chunk);                         \
+    } while (0)
+
+/*
+ * Macro to initialize a H5D_chunk_ud_t structure
+ * given a pointer to a H5D_chk_idx_info_t structure
+ */
+#define H5D_MPIO_INIT_CHUNK_UD_INFO(chunk_ud, index_info_ptr)                                                \
+    do {                                                                                                     \
+        HDmemset(&chunk_ud, 0, sizeof(H5D_chunk_ud_t));                                                      \
+        chunk_ud.common.layout  = (index_info_ptr)->layout;                                                  \
+        chunk_ud.common.storage = (index_info_ptr)->storage;                                                 \
+    } while (0)
+
 /******************/
 /* Local Typedefs */
 /******************/
+
 /* Combine chunk address and chunk info into a struct for better performance. */
 typedef struct H5D_chunk_addr_info_t {
     haddr_t          chunk_addr;
@@ -100,115 +155,137 @@ typedef enum H5D_mpio_no_rank0_bcast_cause_t {
 } H5D_mpio_no_rank0_bcast_cause_t;
 
 /*
+ * Information necessary for re-allocating file space for a chunk
+ * during a parallel write of a chunked dataset with filters
+ * applied.
+ */
+typedef struct H5D_chunk_alloc_info_t {
+    H5F_block_t chunk_current;
+    H5F_block_t chunk_new;
+    hsize_t     chunk_idx;
+} H5D_chunk_alloc_info_t;
+
+/*
+ * Information for a chunk pertaining to the dataset's chunk
+ * index entry for the chunk
+ */
+typedef struct H5D_chunk_index_info_t {
+    hsize_t  chunk_idx;
+    unsigned filter_mask;
+    hbool_t  need_insert;
+} H5D_chunk_index_info_t;
+
+/*
  * Information about a single chunk when performing collective filtered I/O. All
  * of the fields of one of these structs are initialized at the start of collective
- * filtered I/O in the function H5D__construct_filtered_io_info_list().
- *
- * This struct's fields are as follows:
- *
- *   index - The "Index" of the chunk in the dataset. The index of a chunk is used during
- *           the collective re-insertion of chunks into the chunk index after the collective
- *           I/O has been performed.
- *
- *   scaled - The scaled coordinates of the chunk in the dataset's file dataspace. The
- *            coordinates are used in both the collective re-allocation of space in the file
- *            and the collective re-insertion of chunks into the chunk index after the collective
- *            I/O has been performed.
+ * filtered I/O in the function H5D__mpio_collective_filtered_chunk_io_setup(). This
+ * struct's fields are as follows:
  *
- *   full_overwrite - A flag which determines whether or not a chunk needs to be read from the
- *                    file when being updated. If a chunk is being fully overwritten (the entire
- *                    extent is selected in its file dataspace), then it is not necessary to
- *                    read the chunk from the file. However, if the chunk is not being fully
- *                    overwritten, it has to be read from the file in order to update the chunk
- *                    without trashing the parts of the chunk that are not selected.
+ * index_info - A structure containing the information needed when collectively
+ *              re-inserting the chunk into the dataset's chunk index. The structure
+ *              is distributed to all ranks during the re-insertion operation. Its fields
+ *              are as follows:
  *
- *   num_writers - The total number of processors writing to this chunk. This field is used
- *                 when the new owner of a chunk is receiving messages, which contain selections in
- *                 the chunk and data to update the chunk with, from other processors which have this
- *                 chunk selected in the I/O operation. The new owner must know how many processors it
- *                 should expect messages from so that it can post an equal number of receive calls.
+ *     chunk_idx - The index of the chunk in the dataset's chunk index.
  *
- *   io_size - The total size of I/O to this chunk. This field is an accumulation of the size of
- *             I/O to the chunk from each processor which has the chunk selected and is used to
- *             determine the value for the previous full_overwrite flag.
+ *     filter_mask - A bit-mask that indicates which filters are to be applied to the
+ *                   chunk. Each filter in a chunk's filter pipeline has a bit position
+ *                   that can be masked to disable that particular filter for the chunk.
+ *                   This filter mask is saved alongside the chunk in the file.
  *
- *   buf - A pointer which serves the dual purpose of holding either the chunk data which is to be
- *         written to the file or the chunk data which has been read from the file.
+ *     need_insert - A flag which determines whether or not a chunk needs to be re-inserted into
+ *                   the chunk index after the write operation.
  *
- *   chunk_states - In the case of dataset writes only, this struct is used to track a chunk's size and
- *                  address in the file before and after the filtering operation has occurred.
+ * chunk_info - A pointer to the chunk's H5D_chunk_info_t structure, which contains useful
+ *              information like the dataspaces containing the selection in the chunk.
  *
- *                  Its fields are as follows:
+ * chunk_current - The address in the file and size of this chunk before the filtering
+ *                 operation. When reading a chunk from the file, this field is used to
+ *                 read the correct amount of bytes. It is also used when redistributing
+ *                 shared chunks among MPI ranks and as a parameter to the chunk file
+ *                 space reallocation function.
  *
- *                  chunk_current - The address in the file and size of this chunk before the filtering
- *                                  operation. When reading a chunk from the file, this field is used to
- *                                  read the correct amount of bytes. It is also used when redistributing
- *                                  shared chunks among processors and as a parameter to the chunk file
- *                                  space reallocation function.
+ * chunk_new - The address in the file and size of this chunk after the filtering
+ *             operation. This field is relevant when collectively re-allocating space
+ *             in the file for all of the chunks written to in the I/O operation, as
+ *             their sizes may have changed after their data has been filtered.
  *
- *                  new_chunk - The address in the file and size of this chunk after the filtering
- *                              operation. This field is relevant when collectively re-allocating space
- *                              in the file for all of the chunks written to in the I/O operation, as
- *                              their sizes may have changed after their data has been filtered.
+ * need_read - A flag which determines whether or not a chunk needs to be read from the
+ *             file. During writes, if a chunk is being fully overwritten (the entire extent
+ *             is selected in its file dataspace), then it is not necessary to read the chunk
+ *             from the file. However, if the chunk is not being fully overwritten, it has to
+ *             be read from the file in order to update the chunk without trashing the parts
+ *             of the chunk that are not selected. During reads, this field should generally
+ *             be true, but may be false if the chunk isn't allocated, for example.
  *
- *   owners - In the case of dataset writes only, this struct is used to manage which single processor
- *            will ultimately write data out to the chunk. It allows the other processors to act according
- *            to the decision and send their selection in the chunk, as well as the data they wish
- *            to update the chunk with, to the processor which is writing to the chunk.
+ * skip_filter_pline - A flag which determines whether to skip calls to the filter pipeline
+ *                     for this chunk. This flag is mostly useful for correct handling of
+ *                     partial edge chunks when the "don't filter partial edge chunks" flag
+ *                     is set on the dataset's DCPL.
  *
- *            Its fields are as follows:
+ * io_size - The total size of I/O to this chunk. This field is an accumulation of the size of
+ *           I/O to the chunk from each MPI rank which has the chunk selected and is used to
+ *           determine the value for the previous `full_overwrite` flag.
  *
- *            original_owner - The processor which originally had this chunk selected at the beginning of
- *                             the collective filtered I/O operation. This field is currently used when
- *                             redistributing shared chunks among processors.
+ * chunk_buf_size - The size in bytes of the data buffer allocated for the chunk
  *
- *            new_owner - The processor which has been selected to perform the write to this chunk.
+ * orig_owner - The MPI rank which originally had this chunk selected at the beginning of
+ *              the collective filtered I/O operation. This field is currently used when
+ *              redistributing shared chunks among MPI ranks.
  *
- *   async_info - In the case of dataset writes only, this struct is used by the owning processor of the
- *                chunk in order to manage the MPI send and receive calls made between it and all of
- *                the other processors which have this chunk selected in the I/O operation.
+ * new_owner - The MPI rank which has been selected to perform the modifications to this chunk.
  *
- *                Its fields are as follows:
+ * num_writers - The total number of MPI ranks writing to this chunk. This field is used when
+ *               the new owner of a chunk is receiving messages from other MPI ranks that
+ *               contain their selections in the chunk and the data to update the chunk with.
+ *               The new owner must know how many MPI ranks it should expect messages from so
+ *               that it can post an equal number of receive calls.
  *
- *                receive_requests_array - An array containing one MPI_Request for each of the
- *                                         asynchronous MPI receive calls the owning processor of this
- *                                         chunk makes to another processor in order to receive that
- *                                         processor's chunk modification data and selection in the chunk.
+ * buf - A pointer which serves the dual purpose of holding either the chunk data which is to be
+ *       written to the file or the chunk data which has been read from the file.
  *
- *                receive_buffer_array - An array of buffers into which the owning processor of this chunk
- *                                       will store chunk modification data and the selection in the chunk
- *                                       received from another processor.
+ * hh - A handle for hash tables provided by the uthash.h header
  *
- *                num_receive_requests - The number of entries in the receive_request_array and
- *                                       receive_buffer_array fields.
  */
 typedef struct H5D_filtered_collective_io_info_t {
-    hsize_t index;
-    hsize_t scaled[H5O_LAYOUT_NDIMS];
-    hbool_t full_overwrite;
-    size_t  num_writers;
-    size_t  io_size;
-    void *  buf;
-
-    struct {
-        H5F_block_t chunk_current;
-        H5F_block_t new_chunk;
-    } chunk_states;
-
-    struct {
-        int original_owner;
-        int new_owner;
-    } owners;
-
-    struct {
-        MPI_Request *   receive_requests_array;
-        unsigned char **receive_buffer_array;
-        int             num_receive_requests;
-    } async_info;
+    H5D_chunk_index_info_t index_info;
+
+    H5D_chunk_info_t *chunk_info;
+    H5F_block_t       chunk_current;
+    H5F_block_t       chunk_new;
+    hbool_t           need_read;
+    hbool_t           skip_filter_pline;
+    size_t            io_size;
+    size_t            chunk_buf_size;
+    int               orig_owner;
+    int               new_owner;
+    int               num_writers;
+    void *            buf;
+
+    UT_hash_handle hh;
 } H5D_filtered_collective_io_info_t;
 
-/* Function pointer typedef for sort function */
-typedef int (*H5D_mpio_sort_func_cb_t)(const void *, const void *);
+/*
+ * Information necessary for redistributing shared chunks during
+ * a parallel write of a chunked dataset with filters applied.
+ */
+typedef struct H5D_chunk_redistribute_info_t {
+    H5F_block_t chunk_block;
+    hsize_t     chunk_idx;
+    int         orig_owner;
+    int         new_owner;
+    int         num_writers;
+} H5D_chunk_redistribute_info_t;
+
+/*
+ * Information used when re-inserting a chunk into a dataset's
+ * chunk index during a parallel write of a chunked dataset with
+ * filters applied.
+ */
+typedef struct H5D_chunk_insert_info_t {
+    H5F_block_t            chunk_block;
+    H5D_chunk_index_info_t index_info;
+} H5D_chunk_insert_info_t;
 
 /********************/
 /* Local Prototypes */
@@ -216,53 +293,98 @@ typedef int (*H5D_mpio_sort_func_cb_t)(const void *, const void *);
 static herr_t H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
                                        H5D_chunk_map_t *fm);
 static herr_t H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                             H5D_chunk_map_t *fm);
+                                             H5D_chunk_map_t *fm, int mpi_rank, int mpi_size);
 static herr_t H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *        io_info,
-                                                      const H5D_type_info_t *type_info, H5D_chunk_map_t *fm);
+                                                      const H5D_type_info_t *type_info, H5D_chunk_map_t *fm,
+                                                      int mpi_rank, int mpi_size);
 static herr_t H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                            H5D_chunk_map_t *fm, int sum_chunk);
+                                            H5D_chunk_map_t *fm, int sum_chunk, int mpi_rank, int mpi_size);
 static herr_t H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                                     H5D_chunk_map_t *fm);
+                                                     H5D_chunk_map_t *fm, int mpi_rank, int mpi_size);
 static herr_t H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
                                        const H5S_t *file_space, const H5S_t *mem_space);
 static herr_t H5D__final_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
                                        hsize_t nelmts, MPI_Datatype mpi_file_type, MPI_Datatype mpi_buf_type);
 static herr_t H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
-                              H5D_chunk_addr_info_t chunk_addr_info_array[], int many_chunk_opt);
+                              H5D_chunk_addr_info_t chunk_addr_info_array[], int many_chunk_opt, int mpi_rank,
+                              int mpi_size);
 static herr_t H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assign_io_mode[],
-                                    haddr_t chunk_addr[]);
+                                    haddr_t chunk_addr[], int mpi_rank, int mpi_size);
 static herr_t H5D__mpio_get_sum_chunk(const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
                                       int *sum_chunkf);
-static herr_t H5D__construct_filtered_io_info_list(const H5D_io_info_t *               io_info,
-                                                   const H5D_type_info_t *             type_info,
-                                                   const H5D_chunk_map_t *             fm,
-                                                   H5D_filtered_collective_io_info_t **chunk_list,
-                                                   size_t *                            num_entries);
-#if MPI_VERSION >= 3
-static herr_t H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *              io_info,
-                                                    const H5D_type_info_t *            type_info,
-                                                    const H5D_chunk_map_t *            fm,
-                                                    H5D_filtered_collective_io_info_t *local_chunk_array,
-                                                    size_t *local_chunk_array_num_entries);
-#endif
-static herr_t H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries,
-                                      size_t array_entry_size, void **gathered_array,
-                                      size_t *gathered_array_num_entries, hbool_t allgather, int root,
-                                      MPI_Comm comm, int (*sort_func)(const void *, const void *));
-static herr_t H5D__mpio_filtered_collective_write_type(H5D_filtered_collective_io_info_t *chunk_list,
-                                                       size_t num_entries, MPI_Datatype *new_mem_type,
-                                                       hbool_t *mem_type_derived, MPI_Datatype *new_file_type,
-                                                       hbool_t *file_type_derived);
-static herr_t H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk_entry,
-                                                      const H5D_io_info_t *              io_info,
-                                                      const H5D_type_info_t *            type_info,
-                                                      const H5D_chunk_map_t *            fm);
+static herr_t H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t *               io_info,
+                                                           const H5D_type_info_t *             type_info,
+                                                           const H5D_chunk_map_t *             fm,
+                                                           H5D_filtered_collective_io_info_t **chunk_list,
+                                                           size_t *num_entries, int mpi_rank);
+static herr_t H5D__mpio_redistribute_shared_chunks(H5D_filtered_collective_io_info_t *chunk_list,
+                                                   size_t                             chunk_list_num_entries,
+                                                   const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
+                                                   int mpi_rank, int mpi_size,
+                                                   size_t **rank_chunks_assigned_map);
+static herr_t H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chunk_list,
+                                                       size_t *               num_chunks_assigned_map,
+                                                       hbool_t                all_ranks_involved,
+                                                       const H5D_io_info_t *  io_info,
+                                                       const H5D_chunk_map_t *fm, int mpi_rank, int mpi_size);
+static herr_t H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk_list,
+                                                      size_t *chunk_list_num_entries, H5D_io_info_t *io_info,
+                                                      const H5D_type_info_t *type_info, int mpi_rank,
+                                                      int                                 mpi_size,
+                                                      H5D_filtered_collective_io_info_t **chunk_hash_table,
+                                                      unsigned char ***                   chunk_msg_bufs,
+                                                      int *                               chunk_msg_bufs_len);
+static herr_t H5D__mpio_collective_filtered_chunk_common_io(H5D_filtered_collective_io_info_t *chunk_list,
+                                                            size_t                 chunk_list_num_entries,
+                                                            const H5D_io_info_t *  io_info,
+                                                            const H5D_type_info_t *type_info, int mpi_size);
+static herr_t H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chunk_list,
+                                                       size_t                 chunk_list_num_entries,
+                                                       const H5D_io_info_t *  io_info,
+                                                       const H5D_type_info_t *type_info, int mpi_rank,
+                                                       int mpi_size);
+static herr_t H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *chunk_list,
+                                                         size_t chunk_list_num_entries,
+                                                         H5D_filtered_collective_io_info_t *chunk_hash_table,
+                                                         unsigned char **                   chunk_msg_bufs,
+                                                         int chunk_msg_bufs_len, const H5D_io_info_t *io_info,
+                                                         const H5D_type_info_t *type_info, int mpi_rank,
+                                                         int mpi_size);
+static herr_t H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t *chunk_list,
+                                                             size_t              chunk_list_num_entries,
+                                                             size_t *            num_chunks_assigned_map,
+                                                             H5D_io_info_t *     io_info,
+                                                             H5D_chk_idx_info_t *idx_info, int mpi_rank,
+                                                             int mpi_size);
+static herr_t H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *chunk_list,
+                                                           size_t              chunk_list_num_entries,
+                                                           size_t *            num_chunks_assigned_map,
+                                                           H5D_io_info_t *     io_info,
+                                                           H5D_chk_idx_info_t *idx_info, int mpi_rank,
+                                                           int mpi_size);
+static herr_t H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type,
+                                                          hbool_t *     contig_type_derived,
+                                                          MPI_Datatype *resized_type,
+                                                          hbool_t *     resized_type_derived);
+static herr_t H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
+                                                   MPI_Datatype *resized_type, hbool_t *resized_type_derived);
+static herr_t H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
+                                                    MPI_Datatype *resized_type,
+                                                    hbool_t *     resized_type_derived);
+static herr_t H5D__mpio_collective_filtered_io_type(H5D_filtered_collective_io_info_t *chunk_list,
+                                                    size_t num_entries, H5D_io_op_type_t op_type,
+                                                    MPI_Datatype *new_mem_type, hbool_t *mem_type_derived,
+                                                    MPI_Datatype *new_file_type, hbool_t *file_type_derived);
 static int    H5D__cmp_chunk_addr(const void *chunk_addr_info1, const void *chunk_addr_info2);
 static int    H5D__cmp_filtered_collective_io_info_entry(const void *filtered_collective_io_info_entry1,
                                                          const void *filtered_collective_io_info_entry2);
-#if MPI_VERSION >= 3
-static int H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective_io_info_entry1,
-                                                            const void *filtered_collective_io_info_entry2);
+static int    H5D__cmp_chunk_redistribute_info(const void *entry1, const void *entry2);
+static int    H5D__cmp_chunk_redistribute_info_orig_owner(const void *entry1, const void *entry2);
+
+#ifdef H5Dmpio_DEBUG
+static herr_t H5D__mpio_debug_init(void);
+static herr_t H5D__mpio_dump_collective_filtered_chunk_list(H5D_filtered_collective_io_info_t *chunk_list,
+                                                            size_t chunk_list_num_entries, int mpi_rank);
 #endif
 
 /*********************/
@@ -273,6 +395,188 @@ static int H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered
 /* Local Variables */
 /*******************/
 
+/* Declare extern free list to manage the H5S_sel_iter_t struct */
+H5FL_EXTERN(H5S_sel_iter_t);
+
+#ifdef H5Dmpio_DEBUG
+
+/* Flags to control debug actions in this file.
+ * (Meant to be indexed by characters)
+ *
+ * These flags can be set with either (or both) the environment variable
+ *      "H5D_mpio_Debug" set to a string containing one or more characters
+ *      (flags) or by setting them as a string value for the
+ *      "H5D_mpio_debug_key" MPI Info key.
+ *
+ * Supported characters in 'H5D_mpio_Debug' string:
+ *      't' trace function entry and exit
+ *      'f' log to file rather than debugging stream
+ *      'm' show (rough) memory usage statistics
+ *      'c' show critical timing information
+ *
+ *      To only show output from a particular MPI rank, specify its rank
+ *      number as a character, e.g.:
+ *
+ *      '0' only show output from rank 0
+ *
+ *      To only show output from a particular range (up to 8 ranks supported
+ *      between 0-9) of MPI ranks, specify the start and end ranks separated
+ *      by a hyphen, e.g.:
+ *
+ *      '0-7' only show output from ranks 0 through 7
+ *
+ */
+static int               H5D_mpio_debug_flags_s[256];
+static int               H5D_mpio_debug_rank_s[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+static hbool_t           H5D_mpio_debug_inited    = FALSE;
+static const char *const trace_in_pre             = "-> ";
+static const char *const trace_out_pre            = "<- ";
+static int               debug_indent             = 0;
+static FILE *            debug_stream             = NULL;
+
+/* Determine if this rank should output debugging info */
+#define H5D_MPIO_DEBUG_THIS_RANK(rank)                                                                       \
+    (H5D_mpio_debug_rank_s[0] < 0 || rank == H5D_mpio_debug_rank_s[0] || rank == H5D_mpio_debug_rank_s[1] || \
+     rank == H5D_mpio_debug_rank_s[2] || rank == H5D_mpio_debug_rank_s[3] ||                                 \
+     rank == H5D_mpio_debug_rank_s[4] || rank == H5D_mpio_debug_rank_s[5] ||                                 \
+     rank == H5D_mpio_debug_rank_s[6] || rank == H5D_mpio_debug_rank_s[7])
+
+/* Print some debugging string */
+#define H5D_MPIO_DEBUG(rank, string)                                                                         \
+    do {                                                                                                     \
+        if (debug_stream && H5D_MPIO_DEBUG_THIS_RANK(rank)) {                                                \
+            HDfprintf(debug_stream, "%*s(Rank %d) " string "\n", debug_indent, "", rank);                    \
+            fflush(debug_stream);                                                                            \
+        }                                                                                                    \
+    } while (0)
+
+/* Print some debugging string with printf-style arguments */
+#define H5D_MPIO_DEBUG_VA(rank, string, ...)                                                                 \
+    do {                                                                                                     \
+        if (debug_stream && H5D_MPIO_DEBUG_THIS_RANK(rank)) {                                                \
+            HDfprintf(debug_stream, "%*s(Rank %d) " string "\n", debug_indent, "", rank, __VA_ARGS__);       \
+            fflush(debug_stream);                                                                            \
+        }                                                                                                    \
+    } while (0)
+
+#define H5D_MPIO_TRACE_ENTER(rank)                                                                           \
+    do {                                                                                                     \
+        hbool_t trace_flag = H5D_mpio_debug_flags_s[(int)'t'];                                               \
+                                                                                                             \
+        if (trace_flag) {                                                                                    \
+            H5D_MPIO_DEBUG_VA(rank, "%s%s", trace_in_pre, __func__);                                         \
+            debug_indent += (int)strlen(trace_in_pre);                                                       \
+        }                                                                                                    \
+    } while (0)
+
+#define H5D_MPIO_TRACE_EXIT(rank)                                                                            \
+    do {                                                                                                     \
+        hbool_t trace_flag = H5D_mpio_debug_flags_s[(int)'t'];                                               \
+                                                                                                             \
+        if (trace_flag) {                                                                                    \
+            debug_indent -= (int)strlen(trace_out_pre);                                                      \
+            H5D_MPIO_DEBUG_VA(rank, "%s%s", trace_out_pre, __func__);                                        \
+        }                                                                                                    \
+    } while (0)
+
+#define H5D_MPIO_TIME_START(rank, op_name)                                                                   \
+    {                                                                                                        \
+        hbool_t           time_flag  = H5D_mpio_debug_flags_s[(int)'c'];                                     \
+        double            start_time = 0.0, end_time = 0.0;                                                  \
+        const char *const op = op_name;                                                                      \
+                                                                                                             \
+        if (time_flag) {                                                                                     \
+            start_time = MPI_Wtime();                                                                        \
+        }
+
+#define H5D_MPIO_TIME_STOP(rank)                                                                             \
+    if (time_flag) {                                                                                         \
+        end_time = MPI_Wtime();                                                                              \
+        H5D_MPIO_DEBUG_VA(rank, "'%s' took %f seconds", op, (end_time - start_time));                        \
+    }                                                                                                        \
+    }
+
+/*---------------------------------------------------------------------------
+ * Function:    H5D__mpio_parse_debug_str
+ *
+ * Purpose:     Parse a string for H5Dmpio-related debugging flags
+ *
+ * Returns:     N/A
+ *
+ *---------------------------------------------------------------------------
+ */
+static void
+H5D__mpio_parse_debug_str(const char *s)
+{
+    FUNC_ENTER_STATIC_NOERR
+
+    HDassert(s);
+
+    while (*s) {
+        int c = (int)(*s);
+
+        if (c >= (int)'0' && c <= (int)'9') {
+            hbool_t range = FALSE;
+
+            if (*(s + 1) && *(s + 2))
+                range = (int)*(s + 1) == '-' && (int)*(s + 2) >= (int)'0' && (int)*(s + 2) <= (int)'9';
+
+            if (range) {
+                int start_rank = c - (int)'0';
+                int end_rank   = (int)*(s + 2) - '0';
+                int num_ranks  = end_rank - start_rank + 1;
+                int i;
+
+                if (num_ranks > 8) {
+                    end_rank  = start_rank + 7;
+                    num_ranks = 8;
+                }
+
+                for (i = 0; i < num_ranks; i++)
+                    H5D_mpio_debug_rank_s[i] = start_rank++;
+
+                s += 3;
+            }
+            else
+                H5D_mpio_debug_rank_s[0] = c - (int)'0';
+        }
+        else
+            H5D_mpio_debug_flags_s[c]++;
+
+        s++;
+    }
+
+    FUNC_LEAVE_NOAPI_VOID
+}
+
+static herr_t
+H5D__mpio_debug_init(void)
+{
+    const char *debug_str;
+    herr_t      ret_value = SUCCEED;
+
+    FUNC_ENTER_STATIC_NOERR
+
+    HDassert(!H5D_mpio_debug_inited);
+
+    /* Clear the debug flag buffer */
+    HDmemset(H5D_mpio_debug_flags_s, 0, sizeof(H5D_mpio_debug_flags_s));
+
+    /* Retrieve and parse the H5Dmpio debug string */
+    debug_str = HDgetenv("H5D_mpio_Debug");
+    if (debug_str)
+        H5D__mpio_parse_debug_str(debug_str);
+
+    if (H5DEBUG(D))
+        debug_stream = H5DEBUG(D);
+
+    H5D_mpio_debug_inited = TRUE;
+
+    FUNC_LEAVE_NOAPI(ret_value)
+}
+
+#endif
+
 /*-------------------------------------------------------------------------
  * Function:    H5D__mpio_opt_possible
  *
@@ -347,14 +651,9 @@ H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space, co
          *  use collective IO will defer until each chunk IO is reached.
          */
 
-#if MPI_VERSION < 3
-    /*
-     * Don't allow parallel writes to filtered datasets if the MPI version
-     * is less than 3. The functions needed (MPI_Mprobe and MPI_Imrecv) will
-     * not be available.
-     */
-    if (io_info->op_type == H5D_IO_OP_WRITE && io_info->dset->shared->layout.type == H5D_CHUNKED &&
-        io_info->dset->shared->dcpl_cache.pline.nused > 0)
+#ifndef H5_HAVE_PARALLEL_FILTERED_WRITES
+    /* Don't allow writes to filtered datasets if the functionality is disabled */
+    if (io_info->op_type == H5D_IO_OP_WRITE && io_info->dset->shared->dcpl_cache.pline.nused > 0)
         local_cause[0] |= H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED;
 #endif
 
@@ -437,6 +736,150 @@ done:
 } /* H5D__mpio_opt_possible() */
 
 /*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_get_no_coll_cause_strings
+ *
+ * Purpose:     When collective I/O is broken internally, it can be useful
+ *              for users to see a representative string for the reason(s)
+ *              why it was broken. This routine inspects the current
+ *              "cause" flags from the API context and prints strings into
+ *              the caller's buffers for the local and global reasons that
+ *              collective I/O was broken.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5D__mpio_get_no_coll_cause_strings(char *local_cause, size_t local_cause_len, char *global_cause,
+                                    size_t global_cause_len)
+{
+    uint32_t local_no_coll_cause;
+    uint32_t global_no_coll_cause;
+    size_t   local_cause_bytes_written  = 0;
+    size_t   global_cause_bytes_written = 0;
+    int      nbits;
+    herr_t   ret_value = SUCCEED;
+
+    FUNC_ENTER_PACKAGE
+
+    HDassert((local_cause && local_cause_len > 0) || (global_cause && global_cause_len > 0));
+
+    /*
+     * Use compile-time assertion so this routine is updated
+     * when any new "no collective cause" values are added
+     */
+    HDcompile_assert(H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE == (H5D_mpio_no_collective_cause_t)256);
+
+    /* Initialize output buffers */
+    if (local_cause)
+        *local_cause = '\0';
+    if (global_cause)
+        *global_cause = '\0';
+
+    /* Retrieve the local and global cause flags from the API context */
+    if (H5CX_get_mpio_local_no_coll_cause(&local_no_coll_cause) < 0)
+        HGOTO_ERROR(H5E_CONTEXT, H5E_CANTGET, FAIL, "unable to get local no collective cause value")
+    if (H5CX_get_mpio_global_no_coll_cause(&global_no_coll_cause) < 0)
+        HGOTO_ERROR(H5E_CONTEXT, H5E_CANTGET, FAIL, "unable to get global no collective cause value")
+
+    /*
+     * Append each of the "reason for breaking collective I/O"
+     * error messages to the local and global cause string buffers
+     */
+    nbits = 8 * sizeof(local_no_coll_cause);
+    for (int bit_pos = 0; bit_pos < nbits; bit_pos++) {
+        H5D_mpio_no_collective_cause_t cur_cause;
+        const char *                   cause_str;
+        size_t                         buf_space_left;
+
+        cur_cause = (H5D_mpio_no_collective_cause_t)(1 << bit_pos);
+        if (cur_cause == H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE)
+            break;
+
+        switch (cur_cause) {
+            case H5D_MPIO_SET_INDEPENDENT:
+                cause_str = "independent I/O was requested";
+                break;
+            case H5D_MPIO_DATATYPE_CONVERSION:
+                cause_str = "datatype conversions were required";
+                break;
+            case H5D_MPIO_DATA_TRANSFORMS:
+                cause_str = "data transforms needed to be applied";
+                break;
+            case H5D_MPIO_MPI_OPT_TYPES_ENV_VAR_DISABLED:
+                cause_str = "optimized MPI types flag wasn't set";
+                break;
+            case H5D_MPIO_NOT_SIMPLE_OR_SCALAR_DATASPACES:
+                cause_str = "one of the dataspaces was neither simple nor scalar";
+                break;
+            case H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET:
+                cause_str = "dataset was not contiguous or chunked";
+                break;
+            case H5D_MPIO_PARALLEL_FILTERED_WRITES_DISABLED:
+                cause_str = "parallel writes to filtered datasets are disabled";
+                break;
+            case H5D_MPIO_ERROR_WHILE_CHECKING_COLLECTIVE_POSSIBLE:
+                cause_str = "an error occurred while checking if collective I/O was possible";
+                break;
+            case H5D_MPIO_COLLECTIVE:
+            case H5D_MPIO_NO_COLLECTIVE_MAX_CAUSE:
+            default:
+                HDassert(0 && "invalid no collective cause reason");
+                break;
+        }
+
+        /*
+         * Determine if the local reasons for breaking collective I/O
+         * included the current cause
+         */
+        if (local_cause && (cur_cause & local_no_coll_cause)) {
+            buf_space_left = local_cause_len - local_cause_bytes_written;
+
+            /*
+             * Check if there were any previous error messages included. If
+             * so, prepend a semicolon to separate the messages.
+             */
+            if (buf_space_left && local_cause_bytes_written) {
+                HDstrncat(local_cause, "; ", buf_space_left);
+                local_cause_bytes_written += MIN(buf_space_left, 2);
+                buf_space_left -= MIN(buf_space_left, 2);
+            }
+
+            if (buf_space_left) {
+                HDstrncat(local_cause, cause_str, buf_space_left);
+                local_cause_bytes_written += MIN(buf_space_left, HDstrlen(cause_str));
+            }
+        }
+
+        /*
+         * Determine if the global reasons for breaking collective I/O
+         * included the current cause
+         */
+        if (global_cause && (cur_cause & global_no_coll_cause)) {
+            buf_space_left = global_cause_len - global_cause_bytes_written;
+
+            /*
+             * Check if there were any previous error messages included. If
+             * so, prepend a semicolon to separate the messages.
+             */
+            if (buf_space_left && global_cause_bytes_written) {
+                HDstrncat(global_cause, "; ", buf_space_left);
+                global_cause_bytes_written += MIN(buf_space_left, 2);
+                buf_space_left -= MIN(buf_space_left, 2);
+            }
+
+            if (buf_space_left) {
+                HDstrncat(global_cause, cause_str, buf_space_left);
+                global_cause_bytes_written += MIN(buf_space_left, HDstrlen(cause_str));
+            }
+        }
+    }
+
+done:
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_get_no_coll_cause_strings() */
+
+/*-------------------------------------------------------------------------
  * Function:    H5D__mpio_select_read
  *
  * Purpose:     MPI-IO function to read directly from app buffer to file.
@@ -500,145 +943,6 @@ done:
 } /* end H5D__mpio_select_write() */
 
 /*-------------------------------------------------------------------------
- * Function:    H5D__mpio_array_gatherv
- *
- * Purpose:     Given an array, specified in local_array, by each processor
- *              calling this function, collects each array into a single
- *              array which is then either gathered to the processor
- *              specified by root, when allgather is false, or is
- *              distributed back to all processors when allgather is true.
- *
- *              The number of entries in the array contributed by an
- *              individual processor and the size of each entry should be
- *              specified in local_array_num_entries and array_entry_size,
- *              respectively.
- *
- *              The MPI communicator to use should be specified for comm.
- *
- *              If the sort_func argument is supplied, the array is sorted
- *              before the function returns.
- *
- *              Note: if allgather is specified as true, root is ignored.
- *
- * Return:      Non-negative on success/Negative on failure
- *
- * Programmer:  Jordan Henderson
- *              Sunday, April 9th, 2017
- *
- *-------------------------------------------------------------------------
- */
-static herr_t
-H5D__mpio_array_gatherv(void *local_array, size_t local_array_num_entries, size_t array_entry_size,
-                        void **_gathered_array, size_t *_gathered_array_num_entries, hbool_t allgather,
-                        int root, MPI_Comm comm, H5D_mpio_sort_func_cb_t sort_func)
-{
-    size_t gathered_array_num_entries = 0;    /* The size of the newly-constructed array */
-    void * gathered_array             = NULL; /* The newly-constructed array returned to the caller */
-    int *receive_counts_array = NULL; /* Array containing number of entries each processor is contributing */
-    int *displacements_array =
-        NULL; /* Array of displacements where each processor places its data in the final array */
-    int    mpi_code, mpi_rank, mpi_size;
-    int    sendcount;
-    herr_t ret_value = SUCCEED;
-
-    FUNC_ENTER_STATIC
-
-    HDassert(_gathered_array);
-    HDassert(_gathered_array_num_entries);
-
-    MPI_Comm_size(comm, &mpi_size);
-    MPI_Comm_rank(comm, &mpi_rank);
-
-    /* Determine the size of the end result array by collecting the number
-     * of entries contributed by each processor into a single total.
-     */
-    if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_array_num_entries, &gathered_array_num_entries, 1,
-                                                 MPI_INT, MPI_SUM, comm)))
-        HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
-
-    /* If 0 entries resulted from the collective operation, no processor is contributing anything and there is
-     * nothing to do */
-    if (gathered_array_num_entries > 0) {
-        /*
-         * If gathering to all processors, all processors need to allocate space for the resulting array, as
-         * well as the receive counts and displacements arrays for the collective MPI_Allgatherv call.
-         * Otherwise, only the root processor needs to allocate the space for an MPI_Gatherv call.
-         */
-        if (allgather || (mpi_rank == root)) {
-            if (NULL == (gathered_array = H5MM_malloc(gathered_array_num_entries * array_entry_size)))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate gathered array")
-
-            if (NULL == (receive_counts_array = (int *)H5MM_malloc((size_t)mpi_size * sizeof(int))))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive counts array")
-
-            if (NULL == (displacements_array = (int *)H5MM_malloc((size_t)mpi_size * sizeof(int))))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive displacements array")
-        } /* end if */
-
-        /*
-         * If gathering to all processors, inform each processor of how many entries each other processor is
-         * contributing to the resulting array by collecting the counts into each processor's "receive counts"
-         * array. Otherwise, inform only the root processor of how many entries each other processor is
-         * contributing.
-         */
-        if (allgather) {
-            if (MPI_SUCCESS != (mpi_code = MPI_Allgather(&local_array_num_entries, 1, MPI_INT,
-                                                         receive_counts_array, 1, MPI_INT, comm)))
-                HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
-        } /* end if */
-        else {
-            if (MPI_SUCCESS != (mpi_code = MPI_Gather(&local_array_num_entries, 1, MPI_INT,
-                                                      receive_counts_array, 1, MPI_INT, root, comm)))
-                HMPI_GOTO_ERROR(FAIL, "MPI_Gather failed", mpi_code)
-        } /* end else */
-
-        if (allgather || (mpi_rank == root)) {
-            size_t i;
-
-            /* Multiply each receive count by the size of the array entry, since the data is sent as bytes. */
-            for (i = 0; i < (size_t)mpi_size; i++)
-                H5_CHECKED_ASSIGN(receive_counts_array[i], int,
-                                  (size_t)receive_counts_array[i] * array_entry_size, size_t);
-
-            /* Set receive buffer offsets for the collective MPI_Allgatherv/MPI_Gatherv call. */
-            displacements_array[0] = 0;
-            for (i = 1; i < (size_t)mpi_size; i++)
-                displacements_array[i] = displacements_array[i - 1] + receive_counts_array[i - 1];
-        } /* end if */
-
-        /* As the data is sent as bytes, calculate the true sendcount for the data. */
-        H5_CHECKED_ASSIGN(sendcount, int, local_array_num_entries *array_entry_size, size_t);
-
-        if (allgather) {
-            if (MPI_SUCCESS !=
-                (mpi_code = MPI_Allgatherv(local_array, sendcount, MPI_BYTE, gathered_array,
-                                           receive_counts_array, displacements_array, MPI_BYTE, comm)))
-                HMPI_GOTO_ERROR(FAIL, "MPI_Allgatherv failed", mpi_code)
-        } /* end if */
-        else {
-            if (MPI_SUCCESS !=
-                (mpi_code = MPI_Gatherv(local_array, sendcount, MPI_BYTE, gathered_array,
-                                        receive_counts_array, displacements_array, MPI_BYTE, root, comm)))
-                HMPI_GOTO_ERROR(FAIL, "MPI_Gatherv failed", mpi_code)
-        } /* end else */
-
-        if (sort_func && (allgather || (mpi_rank == root)))
-            HDqsort(gathered_array, gathered_array_num_entries, array_entry_size, sort_func);
-    } /* end if */
-
-    *_gathered_array             = gathered_array;
-    *_gathered_array_num_entries = gathered_array_num_entries;
-
-done:
-    if (receive_counts_array)
-        H5MM_free(receive_counts_array);
-    if (displacements_array)
-        H5MM_free(displacements_array);
-
-    FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D__mpio_array_gatherv() */
-
-/*-------------------------------------------------------------------------
  * Function:    H5D__mpio_get_sum_chunk
  *
  * Purpose:     Routine for obtaining total number of chunks to cover
@@ -793,11 +1097,17 @@ static herr_t
 H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm)
 {
     H5FD_mpio_chunk_opt_t chunk_opt_mode;
-    int                   io_option = H5D_MULTI_CHUNK_IO_MORE_OPT;
-    int                   sum_chunk = -1;
+#ifdef H5Dmpio_DEBUG
+    hbool_t log_file_flag  = FALSE;
+    FILE *  debug_log_file = NULL;
+#endif
 #ifdef H5_HAVE_INSTRUMENTED_LIBRARY
     htri_t temp_not_link_io = FALSE;
 #endif
+    int    io_option = H5D_MULTI_CHUNK_IO_MORE_OPT;
+    int    sum_chunk = -1;
+    int    mpi_rank;
+    int    mpi_size;
     herr_t ret_value = SUCCEED;
 
     FUNC_ENTER_STATIC
@@ -808,6 +1118,36 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
     HDassert(type_info);
     HDassert(fm);
 
+    /* Obtain the current rank of the process and the number of ranks */
+    if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
+        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI rank")
+    if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0)
+        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI size")
+
+#ifdef H5Dmpio_DEBUG
+    /* Initialize file-level debugging if not initialized */
+    if (!H5D_mpio_debug_inited && H5D__mpio_debug_init() < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize H5Dmpio debugging")
+
+    /* Open file for debugging if necessary */
+    log_file_flag = H5D_mpio_debug_flags_s[(int)'f'];
+    if (log_file_flag) {
+        char   debug_log_filename[1024];
+        time_t time_now;
+
+        HDsnprintf(debug_log_filename, 1024, "H5Dmpio_debug.rank%d", mpi_rank);
+
+        if (NULL == (debug_log_file = HDfopen(debug_log_filename, "a")))
+            HGOTO_ERROR(H5E_IO, H5E_OPENERROR, FAIL, "couldn't open debugging log file")
+
+        /* Print a short header for this I/O operation */
+        time_now = time(NULL);
+        HDfprintf(debug_log_file, "##### %s", asctime(localtime(&time_now)));
+
+        debug_stream = debug_log_file;
+    }
+#endif
+
     /* Check the optional property list for the collective chunk IO optimization option */
     if (H5CX_get_mpio_chunk_opt_mode(&chunk_opt_mode) < 0)
         HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't get chunk optimization option")
@@ -820,13 +1160,10 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
     /* via default path. branch by num threshold */
     else {
         unsigned one_link_chunk_io_threshold; /* Threshold to use single collective I/O for all chunks */
-        int      mpi_size;                    /* Number of processes in MPI job */
 
         if (H5D__mpio_get_sum_chunk(io_info, fm, &sum_chunk) < 0)
             HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSWAP, FAIL,
                         "unable to obtain the total chunk number of all processes");
-        if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0)
-            HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size")
 
         /* Get the chunk optimization option threshold */
         if (H5CX_get_mpio_chunk_opt_num(&one_link_chunk_io_threshold) < 0)
@@ -872,22 +1209,12 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
         case H5D_ONE_LINK_CHUNK_IO_MORE_OPT:
             /* Check if there are any filters in the pipeline */
             if (io_info->dset->shared->dcpl_cache.pline.nused > 0) {
-                /* For now, Multi-chunk IO must be forced for parallel filtered read,
-                 * so that data can be unfiltered as it is received. There is significant
-                 * complexity in unfiltering the data when it is read all at once into a
-                 * single buffer.
-                 */
-                if (io_info->op_type == H5D_IO_OP_READ) {
-                    if (H5D__multi_chunk_filtered_collective_io(io_info, type_info, fm) < 0)
-                        HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,
-                                    "couldn't finish optimized multiple filtered chunk MPI-IO")
-                } /* end if */
-                else if (H5D__link_chunk_filtered_collective_io(io_info, type_info, fm) < 0)
+                if (H5D__link_chunk_filtered_collective_io(io_info, type_info, fm, mpi_rank, mpi_size) < 0)
                     HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish filtered linked chunk MPI-IO")
             } /* end if */
             else
                 /* Perform unfiltered link chunk collective IO */
-                if (H5D__link_chunk_collective_io(io_info, type_info, fm, sum_chunk) < 0)
+                if (H5D__link_chunk_collective_io(io_info, type_info, fm, sum_chunk, mpi_rank, mpi_size) < 0)
                 HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish linked chunk MPI-IO")
             break;
 
@@ -895,18 +1222,28 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
         default:                 /* multiple chunk IO via threshold */
             /* Check if there are any filters in the pipeline */
             if (io_info->dset->shared->dcpl_cache.pline.nused > 0) {
-                if (H5D__multi_chunk_filtered_collective_io(io_info, type_info, fm) < 0)
+                if (H5D__multi_chunk_filtered_collective_io(io_info, type_info, fm, mpi_rank, mpi_size) < 0)
                     HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL,
                                 "couldn't finish optimized multiple filtered chunk MPI-IO")
             } /* end if */
             else
                 /* Perform unfiltered multi chunk collective IO */
-                if (H5D__multi_chunk_collective_io(io_info, type_info, fm) < 0)
+                if (H5D__multi_chunk_collective_io(io_info, type_info, fm, mpi_rank, mpi_size) < 0)
                 HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish optimized multiple chunk MPI-IO")
             break;
     } /* end switch */
 
 done:
+#ifdef H5Dmpio_DEBUG
+    /* Close debugging log file */
+    if (debug_log_file) {
+        HDfprintf(debug_log_file, "##############\n\n");
+        if (EOF == HDfclose(debug_log_file))
+            HDONE_ERROR(H5E_IO, H5E_CLOSEERROR, FAIL, "couldn't close debugging log file")
+        debug_stream = H5DEBUG(D);
+    }
+#endif
+
     FUNC_LEAVE_NOAPI(ret_value)
 } /* end H5D__chunk_collective_io */
 
@@ -989,7 +1326,7 @@ done:
  */
 static herr_t
 H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm,
-                              int sum_chunk)
+                              int sum_chunk, int mpi_rank, int mpi_size)
 {
     H5D_chunk_addr_info_t *chunk_addr_info_array = NULL;
     MPI_Datatype           chunk_final_mtype; /* Final memory MPI datatype for all chunks with selection */
@@ -1070,9 +1407,8 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ
         /* Set up the base storage address for this chunk */
         io_info->store = &ctg_store;
 
-#ifdef H5D_DEBUG
-        if (H5DEBUG(D))
-            HDfprintf(H5DEBUG(D), "before inter_collective_io for total chunk = 1 \n");
+#ifdef H5Dmpio_DEBUG
+        H5D_MPIO_DEBUG(mpi_rank, "before inter_collective_io for total chunk = 1");
 #endif
 
         /* Perform I/O */
@@ -1088,9 +1424,8 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ
         num_chunk = H5SL_count(fm->sel_chunks);
         H5_CHECK_OVERFLOW(num_chunk, size_t, int);
 
-#ifdef H5D_DEBUG
-        if (H5DEBUG(D))
-            HDfprintf(H5DEBUG(D), "total_chunks = %zu, num_chunk = %zu\n", total_chunks, num_chunk);
+#ifdef H5Dmpio_DEBUG
+        H5D_MPIO_DEBUG_VA(mpi_rank, "total_chunks = %zu, num_chunk = %zu", total_chunks, num_chunk);
 #endif
 
         /* Set up MPI datatype for chunks selected */
@@ -1121,18 +1456,17 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ
                 HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
                             "couldn't allocate chunk file is derived datatype flags buffer")
 
-#ifdef H5D_DEBUG
-            if (H5DEBUG(D))
-                HDfprintf(H5DEBUG(D), "before sorting the chunk address \n");
+#ifdef H5Dmpio_DEBUG
+            H5D_MPIO_DEBUG(mpi_rank, "before sorting chunk addresses");
 #endif
+
             /* Sort the chunk address */
-            if (H5D__sort_chunk(io_info, fm, chunk_addr_info_array, sum_chunk) < 0)
+            if (H5D__sort_chunk(io_info, fm, chunk_addr_info_array, sum_chunk, mpi_rank, mpi_size) < 0)
                 HGOTO_ERROR(H5E_DATASPACE, H5E_CANTSWAP, FAIL, "unable to sort chunk address")
             ctg_store.contig.dset_addr = chunk_addr_info_array[0].chunk_addr;
 
-#ifdef H5D_DEBUG
-            if (H5DEBUG(D))
-                HDfprintf(H5DEBUG(D), "after sorting the chunk address \n");
+#ifdef H5Dmpio_DEBUG
+            H5D_MPIO_DEBUG(mpi_rank, "after sorting chunk addresses");
 #endif
 
             /* Obtain MPI derived datatype from all individual chunks */
@@ -1237,9 +1571,9 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ
             /* No chunks selected for this process */
             mpi_buf_count = (hsize_t)0;
         } /* end else */
-#ifdef H5D_DEBUG
-        if (H5DEBUG(D))
-            HDfprintf(H5DEBUG(D), "before coming to final collective IO\n");
+
+#ifdef H5Dmpio_DEBUG
+        H5D_MPIO_DEBUG(mpi_rank, "before coming to final collective I/O");
 #endif
 
         /* Set up the base storage address for this chunk */
@@ -1252,11 +1586,11 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ
     } /* end else */
 
 done:
-#ifdef H5D_DEBUG
-    if (H5DEBUG(D))
-        HDfprintf(H5DEBUG(D), "before freeing memory inside H5D_link_collective_io ret_value = %d\n",
-                  ret_value);
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_DEBUG_VA(mpi_rank, "before freeing memory inside H5D_link_collective_io ret_value = %d",
+                      ret_value);
 #endif
+
     /* Release resources */
     if (chunk_addr_info_array)
         H5MM_xfree(chunk_addr_info_array);
@@ -1289,68 +1623,89 @@ done:
 /*-------------------------------------------------------------------------
  * Function:    H5D__link_chunk_filtered_collective_io
  *
- * Purpose:     Routine for one collective IO with one MPI derived datatype
- *              to link with all filtered chunks
- *
- *              1. Construct a list of selected chunks in the collective IO
- *                 operation
- *                 A. If any chunk is being written to by more than 1
- *                    process, the process writing to the chunk which
- *                    currently has the least amount of chunks assigned
- *                    to it becomes the new owner (in the case of ties,
- *                    the lowest MPI rank becomes the new owner)
- *              2. If the operation is a write operation
- *                 A. Loop through each chunk in the operation
- *                    I. If this is not a full overwrite of the chunk
- *                       a) Read the chunk from file and pass the chunk
- *                          through the filter pipeline in reverse order
- *                          (Unfilter the chunk)
+ * Purpose:     Performs collective I/O on filtered chunks by creating a
+ *              single MPI derived datatype to link with all filtered
+ *              chunks. The general algorithm is as follows:
+ *
+ *              1. Construct a list of selected chunks in the collective
+ *                 I/O operation
+ *              2. If the operation is a read operation
+ *                 A. Ensure that the list of chunks is sorted in
+ *                    monotonically non-decreasing order of chunk offset
+ *                    in the file
+ *                 B. Participate in a collective read of chunks from
+ *                    the file
+ *                 C. Loop through each selected chunk, unfiltering it and
+ *                    scattering the data to the application's read buffer
+ *              3. If the operation is a write operation
+ *                 A. Redistribute any chunks being written by more than 1
+ *                    MPI rank, such that the chunk is only owned by 1 MPI
+ *                    rank. The rank writing to the chunk which currently
+ *                    has the least amount of chunks assigned to it becomes
+ *                    the new owner (in the case of ties, the lowest MPI
+ *                    rank becomes the new owner)
+ *                 B. Participate in a collective read of chunks from the
+ *                    file
+ *                 C. Loop through each chunk selected in the operation
+ *                    and for each chunk:
+ *                    I. If we actually read the chunk from the file (if
+ *                       a chunk is being fully overwritten, we skip
+ *                       reading it), pass the chunk through the filter
+ *                       pipeline in reverse order (unfilter the chunk)
  *                    II. Update the chunk data with the modifications from
- *                        the owning process
+ *                        the owning MPI rank
  *                    III. Receive any modification data from other
- *                         processes and update the chunk data with these
+ *                         ranks and update the chunk data with those
  *                         modifications
  *                    IV. Filter the chunk
- *                 B. Contribute the modified chunks to an array gathered
- *                    by all processes which contains the new sizes of
- *                    every chunk modified in the collective IO operation
- *                 C. All processes collectively re-allocate each chunk
- *                    from the gathered array with their new sizes after
- *                    the filter operation
- *                 D. If this process has any chunks selected in the IO
- *                    operation, create an MPI derived type for memory and
- *                    file to write out the process' selected chunks to the
- *                    file
- *                 E. Perform the collective write
- *                 F. All processes collectively re-insert each modified
+ *                 D. Contribute the modified chunks to an array gathered
+ *                    by all ranks which contains information for
+ *                    re-allocating space in the file for every chunk
+ *                    modified. Then, each rank collectively re-allocates
+ *                    each chunk from the gathered array with their new
+ *                    sizes after the filter operation
+ *                 E. Proceed with the collective write operation for all
+ *                    the modified chunks
+ *                 F. Contribute the modified chunks to an array gathered
+ *                    by all ranks which contains information for
+ *                    re-inserting every chunk modified into the chunk
+ *                    index. Then, each rank collectively re-inserts each
  *                    chunk from the gathered array into the chunk index
  *
+ *              TODO: Note that steps D. and F. here are both collective
+ *                    operations that partially share data from the
+ *                    H5D_filtered_collective_io_info_t structure. To
+ *                    try to conserve on memory a bit, the distributed
+ *                    arrays these operations create are discarded after
+ *                    each operation is performed. If memory consumption
+ *                    here proves to not be an issue, the necessary data
+ *                    for both operations could be combined into a single
+ *                    structure so that only one collective MPI operation
+ *                    is needed to carry out both operations, rather than
+ *                    two.
  *
  * Return:      Non-negative on success/Negative on failure
  *
- * Programmer:  Jordan Henderson
- *              Friday, Nov. 4th, 2016
- *
  *-------------------------------------------------------------------------
  */
 static herr_t
 H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                       H5D_chunk_map_t *fm)
+                                       H5D_chunk_map_t *fm, int mpi_rank, int mpi_size)
 {
-    H5D_filtered_collective_io_info_t *chunk_list = NULL; /* The list of chunks being read/written */
-    H5D_filtered_collective_io_info_t *collective_chunk_list =
-        NULL;                /* The list of chunks used during collective operations */
-    H5D_storage_t ctg_store; /* Chunk storage information as contiguous dataset */
-    MPI_Datatype  mem_type             = MPI_BYTE;
-    MPI_Datatype  file_type            = MPI_BYTE;
-    hbool_t       mem_type_is_derived  = FALSE;
-    hbool_t       file_type_is_derived = FALSE;
-    size_t        chunk_list_num_entries;
-    size_t        collective_chunk_list_num_entries;
-    size_t *      num_chunks_selected_array = NULL; /* Array of number of chunks selected on each process */
-    size_t        i;                                /* Local index variable */
-    int           mpi_rank, mpi_size, mpi_code;
-    herr_t        ret_value = SUCCEED;
+    H5D_filtered_collective_io_info_t *chunk_list       = NULL; /* The list of chunks being read/written */
+    H5D_filtered_collective_io_info_t *chunk_hash_table = NULL;
+    unsigned char **                   chunk_msg_bufs   = NULL;
+    H5D_storage_t                      ctg_store; /* Chunk storage information as contiguous dataset */
+    MPI_Datatype                       mem_type                 = MPI_BYTE;
+    MPI_Datatype                       file_type                = MPI_BYTE;
+    hbool_t                            mem_type_is_derived      = FALSE;
+    hbool_t                            file_type_is_derived     = FALSE;
+    size_t *                           rank_chunks_assigned_map = NULL;
+    size_t                             chunk_list_num_entries;
+    size_t                             i;
+    int                                chunk_msg_bufs_len = 0;
+    int                                mpi_code;
+    herr_t                             ret_value = SUCCEED;
 
     FUNC_ENTER_STATIC
 
@@ -1358,11 +1713,12 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_in
     HDassert(type_info);
     HDassert(fm);
 
-    /* Obtain the current rank of the process and the number of processes */
-    if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
-    if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size")
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_DEBUG_VA(mpi_rank, "Performing Linked-chunk I/O (%s) with MPI Comm size of %d",
+                      io_info->op_type == H5D_IO_OP_WRITE ? "write" : "read", mpi_size);
+    H5D_MPIO_TIME_START(mpi_rank, "Linked-chunk I/O");
+#endif
 
     /* Set the actual-chunk-opt-mode property. */
     H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_LINK_CHUNK);
@@ -1373,123 +1729,127 @@ H5D__link_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_in
     H5CX_set_mpio_actual_io_mode(H5D_MPIO_CHUNK_COLLECTIVE);
 
     /* Build a list of selected chunks in the collective io operation */
-    if (H5D__construct_filtered_io_info_list(io_info, type_info, fm, &chunk_list, &chunk_list_num_entries) <
-        0)
+    if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, type_info, fm, &chunk_list,
+                                                     &chunk_list_num_entries, mpi_rank) < 0)
         HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't construct filtered I/O info list")
 
-    if (io_info->op_type == H5D_IO_OP_WRITE) { /* Filtered collective write */
+    if (io_info->op_type == H5D_IO_OP_READ) { /* Filtered collective read */
+        if (H5D__mpio_collective_filtered_chunk_read(chunk_list, chunk_list_num_entries, io_info, type_info,
+                                                     mpi_rank, mpi_size) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't read filtered chunks")
+    }
+    else { /* Filtered collective write */
         H5D_chk_idx_info_t index_info;
-        H5D_chunk_ud_t     udata;
         hsize_t            mpi_buf_count;
 
-        /* Construct chunked index info */
-        index_info.f       = io_info->dset->oloc.file;
-        index_info.pline   = &(io_info->dset->shared->dcpl_cache.pline);
-        index_info.layout  = &(io_info->dset->shared->layout.u.chunk);
-        index_info.storage = &(io_info->dset->shared->layout.storage.u.chunk);
-
-        /* Set up chunk information for insertion to chunk index */
-        udata.common.layout  = index_info.layout;
-        udata.common.storage = index_info.storage;
-        udata.filter_mask    = 0;
-
-        /* Iterate through all the chunks in the collective write operation,
-         * updating each chunk with the data modifications from other processes,
-         * then re-filtering the chunk.
-         */
-        for (i = 0; i < chunk_list_num_entries; i++)
-            if (mpi_rank == chunk_list[i].owners.new_owner)
-                if (H5D__filtered_collective_chunk_entry_io(&chunk_list[i], io_info, type_info, fm) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't process chunk entry")
+        H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, io_info);
 
-        /* Gather the new chunk sizes to all processes for a collective reallocation
-         * of the chunks in the file.
-         */
-        if (H5D__mpio_array_gatherv(chunk_list, chunk_list_num_entries,
-                                    sizeof(H5D_filtered_collective_io_info_t),
-                                    (void **)&collective_chunk_list, &collective_chunk_list_num_entries, true,
-                                    0, io_info->comm, NULL) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather new chunk sizes")
-
-        /* Collectively re-allocate the modified chunks (from each process) in the file */
-        for (i = 0; i < collective_chunk_list_num_entries; i++) {
-            hbool_t insert;
-
-            if (H5D__chunk_file_alloc(&index_info, &collective_chunk_list[i].chunk_states.chunk_current,
-                                      &collective_chunk_list[i].chunk_states.new_chunk, &insert,
-                                      collective_chunk_list[i].scaled) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate chunk")
-        } /* end for */
+        if (mpi_size > 1) {
+            /* Redistribute shared chunks being written to */
+            if (H5D__mpio_redistribute_shared_chunks(chunk_list, chunk_list_num_entries, io_info, fm,
+                                                     mpi_rank, mpi_size, &rank_chunks_assigned_map) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks")
 
-        if (NULL == (num_chunks_selected_array = (size_t *)H5MM_malloc((size_t)mpi_size * sizeof(size_t))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate num chunks selected array")
+            /* Send any chunk modification messages for chunks this rank no longer owns */
+            if (H5D__mpio_share_chunk_modification_data(chunk_list, &chunk_list_num_entries, io_info,
+                                                        type_info, mpi_rank, mpi_size, &chunk_hash_table,
+                                                        &chunk_msg_bufs, &chunk_msg_bufs_len) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+                            "unable to send chunk modification data between MPI ranks")
 
-        if (MPI_SUCCESS !=
-            (mpi_code = MPI_Allgather(&chunk_list_num_entries, 1, MPI_UNSIGNED_LONG_LONG,
-                                      num_chunks_selected_array, 1, MPI_UNSIGNED_LONG_LONG, io_info->comm)))
-            HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
+            /* Make sure the local chunk list was updated correctly */
+            HDassert(chunk_list_num_entries == rank_chunks_assigned_map[mpi_rank]);
+        }
 
-        /* If this process has any chunks selected, create a MPI type for collectively
-         * writing out the chunks to file. Otherwise, the process contributes to the
+        /* Proceed to update all the chunks this rank owns with its own
+         * modification data and data from other ranks, before re-filtering
+         * the chunks. As chunk reads are done collectively here, all ranks
+         * must participate.
+         */
+        if (H5D__mpio_collective_filtered_chunk_update(chunk_list, chunk_list_num_entries, chunk_hash_table,
+                                                       chunk_msg_bufs, chunk_msg_bufs_len, io_info, type_info,
+                                                       mpi_rank, mpi_size) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't update modified chunks")
+
+        /* Free up resources used by chunk hash table now that we're done updating chunks */
+        HASH_CLEAR(hh, chunk_hash_table);
+
+        /* All ranks now collectively re-allocate file space for all chunks */
+        if (H5D__mpio_collective_filtered_chunk_reallocate(chunk_list, chunk_list_num_entries,
+                                                           rank_chunks_assigned_map, io_info, &index_info,
+                                                           mpi_rank, mpi_size) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+                        "couldn't collectively re-allocate file space for chunks")
+
+        /* If this rank has any chunks selected, create a MPI type for collectively
+         * writing out the chunks to file. Otherwise, the rank contributes to the
          * collective write with a none type.
          */
-        if (chunk_list_num_entries) {
-            size_t offset;
-
-            /* During the collective re-allocation of chunks in the file, the record for each
-             * chunk is only updated in the collective array, not in the local copy of chunks on each
-             * process. However, each process needs the updated chunk records so that they can create
-             * a MPI type for the collective write that will write to the chunk's possible new locations
-             * in the file instead of the old ones. This ugly hack seems to be the best solution to
-             * copy the information back to the local array and avoid having to modify the collective
-             * write type function in an ugly way so that it will accept the collective array instead
-             * of the local array. This works correctly because the array gather function guarantees
-             * that the chunk data in the collective array is ordered in blocks by rank.
-             */
-            for (i = 0, offset = 0; i < (size_t)mpi_rank; i++)
-                offset += num_chunks_selected_array[i];
-
-            H5MM_memcpy(chunk_list, &collective_chunk_list[offset],
-                        num_chunks_selected_array[mpi_rank] * sizeof(H5D_filtered_collective_io_info_t));
+        if (H5D__mpio_collective_filtered_io_type(chunk_list, chunk_list_num_entries, io_info->op_type,
+                                                  &mem_type, &mem_type_is_derived, &file_type,
+                                                  &file_type_is_derived) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                        "couldn't create MPI type for writing filtered chunks")
 
-            /* Create single MPI type encompassing each selection in the dataspace */
-            if (H5D__mpio_filtered_collective_write_type(chunk_list, chunk_list_num_entries, &mem_type,
-                                                         &mem_type_is_derived, &file_type,
-                                                         &file_type_is_derived) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_BADTYPE, FAIL, "couldn't create MPI link chunk I/O type")
+        mpi_buf_count = (file_type_is_derived || mem_type_is_derived) ? 1 : 0;
 
-            /* Override the write buffer to point to the address of the first
-             * chunk data buffer
+        /* Setup contig storage info for I/O operation */
+        if (chunk_list_num_entries) {
+            /*
+             * Override the write buffer to point to the first
+             * chunk's data buffer
              */
             io_info->u.wbuf = chunk_list[0].buf;
-        } /* end if */
 
-        /* We have a single, complicated MPI datatype for both memory & file */
-        mpi_buf_count = (mem_type_is_derived && file_type_is_derived) ? (hsize_t)1 : (hsize_t)0;
-
-        /* Set up the base storage address for this operation */
-        ctg_store.contig.dset_addr = 0; /* Write address must be set to address 0 */
-        io_info->store             = &ctg_store;
+            /*
+             * Setup the base storage address for this operation
+             * to be the first chunk's file address
+             */
+            ctg_store.contig.dset_addr = chunk_list[0].chunk_new.offset;
+        }
+        else
+            ctg_store.contig.dset_addr = 0;
 
         /* Perform I/O */
+        io_info->store = &ctg_store;
         if (H5D__final_collective_io(io_info, type_info, mpi_buf_count, file_type, mem_type) < 0)
             HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish MPI-IO")
 
+        /* Free up resources in anticipation of following collective operation */
+        for (i = 0; i < chunk_list_num_entries; i++) {
+            if (chunk_list[i].buf) {
+                H5MM_free(chunk_list[i].buf);
+                chunk_list[i].buf = NULL;
+            }
+        }
+
         /* Participate in the collective re-insertion of all chunks modified
-         * in this iteration into the chunk index
+         * into the chunk index
          */
-        for (i = 0; i < collective_chunk_list_num_entries; i++) {
-            udata.chunk_block   = collective_chunk_list[i].chunk_states.new_chunk;
-            udata.common.scaled = collective_chunk_list[i].scaled;
-            udata.chunk_idx     = collective_chunk_list[i].index;
-
-            if ((index_info.storage->ops->insert)(&index_info, &udata, io_info->dset) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, "unable to insert chunk address into index")
-        } /* end for */
-    }     /* end if */
+        if (H5D__mpio_collective_filtered_chunk_reinsert(chunk_list, chunk_list_num_entries,
+                                                         rank_chunks_assigned_map, io_info, &index_info,
+                                                         mpi_rank, mpi_size) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+                        "couldn't collectively re-insert modified chunks into chunk index")
+    }
 
 done:
-    /* Free resources used by a process which had some selection */
+    /* Free the MPI buf and file types, if they were derived */
+    if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
+        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type)))
+        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+
+    if (chunk_msg_bufs) {
+        for (i = 0; i < (size_t)chunk_msg_bufs_len; i++)
+            H5MM_free(chunk_msg_bufs[i]);
+
+        H5MM_free(chunk_msg_bufs);
+    }
+
+    HASH_CLEAR(hh, chunk_hash_table);
+
+    /* Free resources used by a rank which had some selection */
     if (chunk_list) {
         for (i = 0; i < chunk_list_num_entries; i++)
             if (chunk_list[i].buf)
@@ -1498,16 +1858,13 @@ done:
         H5MM_free(chunk_list);
     } /* end if */
 
-    if (num_chunks_selected_array)
-        H5MM_free(num_chunks_selected_array);
-    if (collective_chunk_list)
-        H5MM_free(collective_chunk_list);
+    if (rank_chunks_assigned_map)
+        H5MM_free(rank_chunks_assigned_map);
 
-    /* Free the MPI buf and file types, if they were derived */
-    if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
-        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
-    if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type)))
-        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
 
     FUNC_LEAVE_NOAPI(ret_value)
 } /* end H5D__link_chunk_filtered_collective_io() */
@@ -1530,7 +1887,8 @@ done:
  *-------------------------------------------------------------------------
  */
 static herr_t
-H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm)
+H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, H5D_chunk_map_t *fm,
+                               int mpi_rank, int mpi_size)
 {
     H5D_io_info_t              ctg_io_info; /* Contiguous I/O info object */
     H5D_storage_t              ctg_store;   /* Chunk storage information as contiguous dataset */
@@ -1543,11 +1901,8 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty
     H5FD_mpio_collective_opt_t last_coll_opt_mode =
         H5FD_MPIO_COLLECTIVE_IO; /* Last parallel transfer with independent IO or collective IO with this mode
                                   */
-    size_t total_chunk;          /* Total # of chunks in dataset */
-#ifdef H5Dmpio_DEBUG
-    int mpi_rank;
-#endif
-    size_t                    u; /* Local index variable */
+    size_t                    total_chunk; /* Total # of chunks in dataset */
+    size_t                    u;           /* Local index variable */
     H5D_mpio_actual_io_mode_t actual_io_mode =
         H5D_MPIO_NO_COLLECTIVE; /* Local variable for tracking the I/O mode used. */
     herr_t ret_value = SUCCEED;
@@ -1557,10 +1912,6 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty
     /* Set the actual chunk opt mode property */
     H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_MULTI_CHUNK);
 
-#ifdef H5Dmpio_DEBUG
-    mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file);
-#endif
-
     /* Retrieve total # of chunks in dataset */
     H5_CHECKED_ASSIGN(total_chunk, size_t, fm->layout->u.chunk.nchunks, hsize_t);
     HDassert(total_chunk != 0);
@@ -1568,13 +1919,13 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty
     /* Allocate memories */
     chunk_io_option = (uint8_t *)H5MM_calloc(total_chunk);
     chunk_addr      = (haddr_t *)H5MM_calloc(total_chunk * sizeof(haddr_t));
-#ifdef H5D_DEBUG
-    if (H5DEBUG(D))
-        HDfprintf(H5DEBUG(D), "total_chunk %zu\n", total_chunk);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_DEBUG_VA(mpi_rank, "total_chunk %zu", total_chunk);
 #endif
 
     /* Obtain IO option for each chunk */
-    if (H5D__obtain_mpio_mode(io_info, fm, chunk_io_option, chunk_addr) < 0)
+    if (H5D__obtain_mpio_mode(io_info, fm, chunk_io_option, chunk_addr, mpi_rank, mpi_size) < 0)
         HGOTO_ERROR(H5E_DATASET, H5E_CANTRECV, FAIL, "unable to obtain MPIO mode")
 
     /* Set up contiguous I/O info object */
@@ -1602,9 +1953,8 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty
         H5S_t *           fspace;     /* Dataspace describing chunk & selection in it */
         H5S_t *           mspace; /* Dataspace describing selection in memory corresponding to this chunk */
 
-#ifdef H5D_DEBUG
-        if (H5DEBUG(D))
-            HDfprintf(H5DEBUG(D), "mpi_rank = %d, chunk index = %zu\n", mpi_rank, u);
+#ifdef H5Dmpio_DEBUG
+        H5D_MPIO_DEBUG_VA(mpi_rank, "mpi_rank = %d, chunk index = %zu", mpi_rank, u);
 #endif
         /* Get the chunk info for this chunk, if there are elements selected */
         chunk_info = fm->select_chunk[u];
@@ -1622,10 +1972,9 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty
          *      needs to contribute MPI NONE TYPE.
          */
         if (chunk_io_option[u] == H5D_CHUNK_IO_MODE_COL) {
-#ifdef H5D_DEBUG
-            if (H5DEBUG(D))
-                HDfprintf(H5DEBUG(D), "inside collective chunk IO mpi_rank = %d, chunk index = %zu\n",
-                          mpi_rank, u);
+#ifdef H5Dmpio_DEBUG
+            H5D_MPIO_DEBUG_VA(mpi_rank, "inside collective chunk IO mpi_rank = %d, chunk index = %zu",
+                              mpi_rank, u);
 #endif
 
             /* Set the file & memory dataspaces */
@@ -1661,10 +2010,9 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty
                 HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish shared collective MPI-IO")
         }      /* end if */
         else { /* possible independent IO for this chunk */
-#ifdef H5D_DEBUG
-            if (H5DEBUG(D))
-                HDfprintf(H5DEBUG(D), "inside independent IO mpi_rank = %d, chunk index = %zu\n", mpi_rank,
-                          u);
+#ifdef H5Dmpio_DEBUG
+            H5D_MPIO_DEBUG_VA(mpi_rank, "inside independent IO mpi_rank = %d, chunk index = %zu", mpi_rank,
+                              u);
 #endif
 
             HDassert(chunk_io_option[u] == 0);
@@ -1694,9 +2042,8 @@ H5D__multi_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *ty
             /* Perform the I/O */
             if (H5D__inter_collective_io(&ctg_io_info, type_info, fspace, mspace) < 0)
                 HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish shared collective MPI-IO")
-#ifdef H5D_DEBUG
-            if (H5DEBUG(D))
-                HDfprintf(H5DEBUG(D), "after inter collective IO\n");
+#ifdef H5Dmpio_DEBUG
+            H5D_MPIO_DEBUG(mpi_rank, "after inter collective IO");
 #endif
         } /* end else */
     }     /* end for */
@@ -1716,80 +2063,101 @@ done:
 /*-------------------------------------------------------------------------
  * Function:    H5D__multi_chunk_filtered_collective_io
  *
- * Purpose:     To do filtered collective IO iteratively to save on memory.
- *              While link_chunk_filtered_collective_io will construct and
- *              work on a list of all of the chunks selected in the IO
- *              operation at once, this function works iteratively on a set
- *              of chunks at a time; at most one chunk per rank per
- *              iteration.
- *
- *              1. Construct a list of selected chunks in the collective IO
- *                 operation
- *                 A. If any chunk is being written to by more than 1
- *                    process, the process writing to the chunk which
- *                    currently has the least amount of chunks assigned
- *                    to it becomes the new owner (in the case of ties,
- *                    the lowest MPI rank becomes the new owner)
- *              2. If the operation is a read operation
- *                 A. Loop through each chunk in the operation
- *                    I. Read the chunk from the file
- *                    II. Unfilter the chunk
- *                    III. Scatter the read chunk data to the user's buffer
- *              3. If the operation is a write operation
- *                 A. Loop through each chunk in the operation
- *                    I. If this is not a full overwrite of the chunk
- *                       a) Read the chunk from file and pass the chunk
- *                          through the filter pipeline in reverse order
- *                          (Unfilter the chunk)
- *                    II. Update the chunk data with the modifications from
- *                        the owning process
- *                    III. Receive any modification data from other
- *                         processes and update the chunk data with these
- *                         modifications
- *                    IV. Filter the chunk
- *                    V. Contribute the chunk to an array gathered by
- *                        all processes which contains every chunk
- *                        modified in this iteration (up to one chunk
- *                        per process, some processes may not have a
- *                        selection/may have less chunks to work on than
- *                        other processes)
- *                    VI. All processes collectively re-allocate each
- *                        chunk from the gathered array with their new
- *                        sizes after the filter operation
- *                    VII. Proceed with the collective write operation
- *                        for the chunks modified on this iteration
- *                    VIII. All processes collectively re-insert each
- *                       chunk from the gathered array into the chunk
- *                       index
+ * Purpose:     Performs collective I/O on filtered chunks iteratively to
+ *              save on memory and potentially get better performance
+ *              depending on the average number of chunks per rank. While
+ *              linked-chunk I/O will construct and work on a list of all
+ *              of the chunks selected in the I/O operation at once, this
+ *              function works iteratively on a set of chunks at a time; at
+ *              most one chunk per rank per iteration.  The general
+ *              algorithm is as follows:
+ *
+ *              1. Construct a list of selected chunks in the collective
+ *                 I/O operation
+ *              2. If the operation is a read operation, loop an amount of
+ *                 times equal to the maximum number of chunks selected on
+ *                 any particular rank and on each iteration:
+ *                 A. Participate in a collective read of chunks from
+ *                    the file (ranks that run out of chunks still need
+ *                    to participate)
+ *                 B. Unfilter the chunk that was read (if any)
+ *                 C. Scatter the read chunk's data to the application's
+ *                    read buffer
+ *              3. If the operation is a write operation, redistribute any
+ *                 chunks being written to by more than 1 MPI rank, such
+ *                 that the chunk is only owned by 1 MPI rank. The rank
+ *                 writing to the chunk which currently has the least
+ *                 amount of chunks assigned to it becomes the new owner
+ *                 (in the case of ties, the lowest MPI rank becomes the
+ *                 new owner). Then, loop an amount of times equal to the
+ *                 maximum number of chunks selected on any particular
+ *                 rank and on each iteration:
+ *                 A. Participate in a collective read of chunks from
+ *                    the file (ranks that run out of chunks still need
+ *                    to participate)
+ *                    I. If we actually read a chunk from the file (if
+ *                       a chunk is being fully overwritten, we skip
+ *                       reading it), pass the chunk through the filter
+ *                       pipeline in reverse order (unfilter the chunk)
+ *                 B. Update the chunk data with the modifications from
+ *                    the owning rank
+ *                 C. Receive any modification data from other ranks and
+ *                    update the chunk data with those modifications
+ *                 D. Filter the chunk
+ *                 E. Contribute the chunk to an array gathered by
+ *                    all ranks which contains information for
+ *                    re-allocating space in the file for every chunk
+ *                    modified in this iteration (up to one chunk per
+ *                    rank; some ranks may not have a selection/may have
+ *                    less chunks to work on than other ranks). Then,
+ *                    each rank collectively re-allocates each chunk
+ *                    from the gathered array with their new sizes
+ *                    after the filter operation
+ *                 F. Proceed with the collective write operation
+ *                    for the chunks modified on this iteration
+ *                 G. Contribute the chunk to an array gathered by
+ *                    all ranks which contains information for
+ *                    re-inserting every chunk modified on this
+ *                    iteration into the chunk index. Then, each rank
+ *                    collectively re-inserts each chunk from the
+ *                    gathered array into the chunk index
+ *
+ *              TODO: Note that steps E. and G. here are both collective
+ *                    operations that partially share data from the
+ *                    H5D_filtered_collective_io_info_t structure. To
+ *                    try to conserve on memory a bit, the distributed
+ *                    arrays these operations create are discarded after
+ *                    each operation is performed. If memory consumption
+ *                    here proves to not be an issue, the necessary data
+ *                    for both operations could be combined into a single
+ *                    structure so that only one collective MPI operation
+ *                    is needed to carry out both operations, rather than
+ *                    two.
  *
  * Return:      Non-negative on success/Negative on failure
  *
- * Programmer:  Jordan Henderson
- *              Friday, Dec. 2nd, 2016
- *
  *-------------------------------------------------------------------------
  */
 static herr_t
 H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                        H5D_chunk_map_t *fm)
+                                        H5D_chunk_map_t *fm, int mpi_rank, int mpi_size)
 {
-    H5D_filtered_collective_io_info_t *chunk_list = NULL; /* The list of chunks being read/written */
-    H5D_filtered_collective_io_info_t *collective_chunk_list =
-        NULL;                  /* The list of chunks used during collective operations */
-    H5D_storage_t store;       /* union of EFL and chunk pointer in file space */
-    H5D_io_info_t ctg_io_info; /* Contiguous I/O info object */
-    H5D_storage_t ctg_store;   /* Chunk storage information as contiguous dataset */
-    MPI_Datatype *file_type_array            = NULL;
-    MPI_Datatype *mem_type_array             = NULL;
-    hbool_t *     file_type_is_derived_array = NULL;
-    hbool_t *     mem_type_is_derived_array  = NULL;
-    hbool_t *     has_chunk_selected_array =
-        NULL; /* Array of whether or not each process is contributing a chunk to each iteration */
-    size_t chunk_list_num_entries;
-    size_t collective_chunk_list_num_entries;
-    size_t i, j; /* Local index variable */
-    int    mpi_rank, mpi_size, mpi_code;
-    herr_t ret_value = SUCCEED;
+    H5D_filtered_collective_io_info_t *chunk_list       = NULL; /* The list of chunks being read/written */
+    H5D_filtered_collective_io_info_t *chunk_hash_table = NULL;
+    unsigned char **                   chunk_msg_bufs   = NULL;
+    H5D_io_info_t                      ctg_io_info; /* Contiguous I/O info object */
+    H5D_storage_t                      ctg_store;   /* Chunk storage information as contiguous dataset */
+    MPI_Datatype                       mem_type             = MPI_BYTE;
+    MPI_Datatype                       file_type            = MPI_BYTE;
+    hbool_t                            mem_type_is_derived  = FALSE;
+    hbool_t                            file_type_is_derived = FALSE;
+    hbool_t                            have_chunk_to_process;
+    size_t                             chunk_list_num_entries;
+    size_t                             i;
+    size_t                             max_num_chunks;
+    int                                chunk_msg_bufs_len = 0;
+    int                                mpi_code;
+    herr_t                             ret_value = SUCCEED;
 
     FUNC_ENTER_STATIC
 
@@ -1797,11 +2165,12 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i
     HDassert(type_info);
     HDassert(fm);
 
-    /* Obtain the current rank of the process and the number of processes */
-    if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
-    if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size")
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_DEBUG_VA(mpi_rank, "Performing Multi-chunk I/O (%s) with MPI Comm size of %d",
+                      io_info->op_type == H5D_IO_OP_WRITE ? "write" : "read", mpi_size);
+    H5D_MPIO_TIME_START(mpi_rank, "Multi-chunk I/O");
+#endif
 
     /* Set the actual chunk opt mode property */
     H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_MULTI_CHUNK);
@@ -1812,10 +2181,19 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i
     H5CX_set_mpio_actual_io_mode(H5D_MPIO_CHUNK_COLLECTIVE);
 
     /* Build a list of selected chunks in the collective IO operation */
-    if (H5D__construct_filtered_io_info_list(io_info, type_info, fm, &chunk_list, &chunk_list_num_entries) <
-        0)
+    if (H5D__mpio_collective_filtered_chunk_io_setup(io_info, type_info, fm, &chunk_list,
+                                                     &chunk_list_num_entries, mpi_rank) < 0)
         HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't construct filtered I/O info list")
 
+    /* Retrieve the maximum number of chunks selected for any rank */
+    if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&chunk_list_num_entries, &max_num_chunks, 1,
+                                                 MPI_UNSIGNED_LONG_LONG, MPI_MAX, io_info->comm)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
+
+    /* If no one has anything selected at all, end the operation */
+    if (0 == max_num_chunks)
+        HGOTO_DONE(SUCCEED);
+
     /* Set up contiguous I/O info object */
     H5MM_memcpy(&ctg_io_info, io_info, sizeof(ctg_io_info));
     ctg_io_info.store      = &ctg_store;
@@ -1823,190 +2201,147 @@ H5D__multi_chunk_filtered_collective_io(H5D_io_info_t *io_info, const H5D_type_i
 
     /* Initialize temporary contiguous storage info */
     ctg_store.contig.dset_size = (hsize_t)io_info->dset->shared->layout.u.chunk.size;
-    ctg_store.contig.dset_addr = 0;
-
-    /* Set dataset storage for I/O info */
-    io_info->store = &store;
 
     if (io_info->op_type == H5D_IO_OP_READ) { /* Filtered collective read */
-        for (i = 0; i < chunk_list_num_entries; i++)
-            if (H5D__filtered_collective_chunk_entry_io(&chunk_list[i], io_info, type_info, fm) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't process chunk entry")
-    }      /* end if */
+        for (i = 0; i < max_num_chunks; i++) {
+            /* Check if this rank has a chunk to work on for this iteration */
+            have_chunk_to_process = (i < chunk_list_num_entries);
+
+            if (H5D__mpio_collective_filtered_chunk_read(have_chunk_to_process ? &chunk_list[i] : NULL,
+                                                         have_chunk_to_process ? 1 : 0, io_info, type_info,
+                                                         mpi_rank, mpi_size) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't read filtered chunks")
+
+            if (have_chunk_to_process && chunk_list[i].buf) {
+                H5MM_free(chunk_list[i].buf);
+                chunk_list[i].buf = NULL;
+            }
+        }
+    }
     else { /* Filtered collective write */
         H5D_chk_idx_info_t index_info;
-        H5D_chunk_ud_t     udata;
-        size_t             max_num_chunks;
         hsize_t            mpi_buf_count;
 
         /* Construct chunked index info */
-        index_info.f       = io_info->dset->oloc.file;
-        index_info.pline   = &(io_info->dset->shared->dcpl_cache.pline);
-        index_info.layout  = &(io_info->dset->shared->layout.u.chunk);
-        index_info.storage = &(io_info->dset->shared->layout.storage.u.chunk);
-
-        /* Set up chunk information for insertion to chunk index */
-        udata.common.layout  = index_info.layout;
-        udata.common.storage = index_info.storage;
-        udata.filter_mask    = 0;
-
-        /* Retrieve the maximum number of chunks being written among all processes */
-        if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&chunk_list_num_entries, &max_num_chunks, 1,
-                                                     MPI_UNSIGNED_LONG_LONG, MPI_MAX, io_info->comm)))
-            HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
-
-        /* If no one is writing anything at all, end the operation */
-        if (!(max_num_chunks > 0))
-            HGOTO_DONE(SUCCEED);
-
-        /* Allocate arrays for storing MPI file and mem types and whether or not the
-         * types were derived.
-         */
-        if (NULL == (file_type_array = (MPI_Datatype *)H5MM_malloc(max_num_chunks * sizeof(MPI_Datatype))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file type array")
-
-        if (NULL == (file_type_is_derived_array = (hbool_t *)H5MM_calloc(max_num_chunks * sizeof(hbool_t))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file type is derived array")
-
-        if (NULL == (mem_type_array = (MPI_Datatype *)H5MM_malloc(max_num_chunks * sizeof(MPI_Datatype))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate mem type array")
-
-        if (NULL == (mem_type_is_derived_array = (hbool_t *)H5MM_calloc(max_num_chunks * sizeof(hbool_t))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate mem type is derived array")
+        H5D_MPIO_INIT_CHUNK_IDX_INFO(index_info, io_info);
+
+        if (mpi_size > 1) {
+            /* Redistribute shared chunks being written to */
+            if (H5D__mpio_redistribute_shared_chunks(chunk_list, chunk_list_num_entries, io_info, fm,
+                                                     mpi_rank, mpi_size, NULL) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks")
+
+            /* Send any chunk modification messages for chunks this rank no longer owns */
+            if (H5D__mpio_share_chunk_modification_data(chunk_list, &chunk_list_num_entries, io_info,
+                                                        type_info, mpi_rank, mpi_size, &chunk_hash_table,
+                                                        &chunk_msg_bufs, &chunk_msg_bufs_len) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+                            "unable to send chunk modification data between MPI ranks")
+        }
 
-        /* Iterate over the max number of chunks among all processes, as this process could
-         * have no chunks left to work on, but it still needs to participate in the collective
-         * re-allocation and re-insertion of chunks modified by other processes.
+        /* Iterate over the max number of chunks among all ranks, as this rank could
+         * have no chunks left to work on, but it still needs to participate in the
+         * collective re-allocation and re-insertion of chunks modified by other ranks.
          */
         for (i = 0; i < max_num_chunks; i++) {
-            /* Check if this process has a chunk to work on for this iteration */
-            hbool_t have_chunk_to_process =
-                (i < chunk_list_num_entries) && (mpi_rank == chunk_list[i].owners.new_owner);
+            /* Check if this rank has a chunk to work on for this iteration */
+            have_chunk_to_process = (i < chunk_list_num_entries) && (mpi_rank == chunk_list[i].new_owner);
 
-            if (have_chunk_to_process)
-                if (H5D__filtered_collective_chunk_entry_io(&chunk_list[i], io_info, type_info, fm) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't process chunk entry")
-
-            /* Gather the new chunk sizes to all processes for a collective re-allocation
-             * of the chunks in the file
-             */
-            if (H5D__mpio_array_gatherv(&chunk_list[i], have_chunk_to_process ? 1 : 0,
-                                        sizeof(H5D_filtered_collective_io_info_t),
-                                        (void **)&collective_chunk_list, &collective_chunk_list_num_entries,
-                                        true, 0, io_info->comm, NULL) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather new chunk sizes")
-
-            /* Participate in the collective re-allocation of all chunks modified
-             * in this iteration.
+            /* Proceed to update the chunk this rank owns (if any left) with its
+             * own modification data and data from other ranks, before re-filtering
+             * the chunks. As chunk reads are done collectively here, all ranks
+             * must participate.
              */
-            for (j = 0; j < collective_chunk_list_num_entries; j++) {
-                hbool_t insert = FALSE;
-
-                if (H5D__chunk_file_alloc(&index_info, &collective_chunk_list[j].chunk_states.chunk_current,
-                                          &collective_chunk_list[j].chunk_states.new_chunk, &insert,
-                                          chunk_list[j].scaled) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate chunk")
-            } /* end for */
+            if (H5D__mpio_collective_filtered_chunk_update(have_chunk_to_process ? &chunk_list[i] : NULL,
+                                                           have_chunk_to_process ? 1 : 0, chunk_hash_table,
+                                                           chunk_msg_bufs, chunk_msg_bufs_len, io_info,
+                                                           type_info, mpi_rank, mpi_size) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't update modified chunks")
+
+            /* All ranks now collectively re-allocate file space for all chunks */
+            if (H5D__mpio_collective_filtered_chunk_reallocate(have_chunk_to_process ? &chunk_list[i] : NULL,
+                                                               have_chunk_to_process ? 1 : 0, NULL, io_info,
+                                                               &index_info, mpi_rank, mpi_size) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+                            "couldn't collectively re-allocate file space for chunks")
 
-            if (NULL ==
-                (has_chunk_selected_array = (hbool_t *)H5MM_malloc((size_t)mpi_size * sizeof(hbool_t))))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate num chunks selected array")
-
-            if (MPI_SUCCESS !=
-                (mpi_code = MPI_Allgather(&have_chunk_to_process, 1, MPI_C_BOOL, has_chunk_selected_array, 1,
-                                          MPI_C_BOOL, io_info->comm)))
-                HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
-
-            /* If this process has a chunk to work on, create a MPI type for the
-             * memory and file for writing out the chunk
+            /*
+             * If this rank has a chunk to work on, create a MPI type
+             * for writing out the chunk. Otherwise, the rank will
+             * use MPI_BYTE for the file and memory type and specify
+             * a count of 0.
              */
-            if (have_chunk_to_process) {
-                size_t offset;
-                int    mpi_type_count;
-
-                for (j = 0, offset = 0; j < (size_t)mpi_rank; j++)
-                    offset += has_chunk_selected_array[j];
-
-                /* Collect the new chunk info back to the local copy, since only the record in the
-                 * collective array gets updated by the chunk re-allocation */
-                H5MM_memcpy(&chunk_list[i].chunk_states.new_chunk,
-                            &collective_chunk_list[offset].chunk_states.new_chunk,
-                            sizeof(chunk_list[i].chunk_states.new_chunk));
-
-                H5_CHECKED_ASSIGN(mpi_type_count, int, chunk_list[i].chunk_states.new_chunk.length, hsize_t);
-
-                /* Create MPI memory type for writing to chunk */
-                if (MPI_SUCCESS !=
-                    (mpi_code = MPI_Type_contiguous(mpi_type_count, MPI_BYTE, &mem_type_array[i])))
-                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
-                if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&mem_type_array[i])))
-                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-                mem_type_is_derived_array[i] = TRUE;
-
-                /* Create MPI file type for writing to chunk */
-                if (MPI_SUCCESS !=
-                    (mpi_code = MPI_Type_contiguous(mpi_type_count, MPI_BYTE, &file_type_array[i])))
-                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
-                if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(&file_type_array[i])))
-                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-                file_type_is_derived_array[i] = TRUE;
-
-                mpi_buf_count = 1;
+            if (H5D__mpio_collective_filtered_io_type(
+                    have_chunk_to_process ? &chunk_list[i] : NULL, have_chunk_to_process ? 1 : 0,
+                    io_info->op_type, &mem_type, &mem_type_is_derived, &file_type, &file_type_is_derived) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                            "couldn't create MPI type for writing filtered chunks")
 
-                /* Set up the base storage address for this operation */
-                ctg_store.contig.dset_addr = chunk_list[i].chunk_states.new_chunk.offset;
+            mpi_buf_count = (file_type_is_derived || mem_type_is_derived) ? 1 : 0;
 
-                /* Override the write buffer to point to the address of the
-                 * chunk data buffer
+            /* Override the write buffer to point to the chunk data buffer */
+            if (have_chunk_to_process) {
+                /*
+                 * Override the write buffer to point to the
+                 * chunk's data buffer
                  */
                 ctg_io_info.u.wbuf = chunk_list[i].buf;
-            } /* end if */
-            else {
-                mem_type_array[i] = file_type_array[i] = MPI_BYTE;
-                mpi_buf_count                          = 0;
-            } /* end else */
+
+                /*
+                 * Setup the base storage address for this
+                 * operation to be the chunk's file address
+                 */
+                ctg_store.contig.dset_addr = chunk_list[i].chunk_new.offset;
+            }
+            else
+                ctg_store.contig.dset_addr = 0;
 
             /* Perform the I/O */
-            if (H5D__final_collective_io(&ctg_io_info, type_info, mpi_buf_count, file_type_array[i],
-                                         mem_type_array[i]) < 0)
+            if (H5D__final_collective_io(&ctg_io_info, type_info, mpi_buf_count, file_type, mem_type) < 0)
                 HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "couldn't finish MPI-IO")
 
+            /* Free up resources in anticipation of following collective operation */
+            if (have_chunk_to_process && chunk_list[i].buf) {
+                H5MM_free(chunk_list[i].buf);
+                chunk_list[i].buf = NULL;
+            }
+
             /* Participate in the collective re-insertion of all chunks modified
              * in this iteration into the chunk index
              */
-            for (j = 0; j < collective_chunk_list_num_entries; j++) {
-                udata.chunk_block   = collective_chunk_list[j].chunk_states.new_chunk;
-                udata.common.scaled = collective_chunk_list[j].scaled;
-                udata.chunk_idx     = collective_chunk_list[j].index;
-
-                if ((index_info.storage->ops->insert)(&index_info, &udata, io_info->dset) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL,
-                                "unable to insert chunk address into index")
-            } /* end for */
+            if (H5D__mpio_collective_filtered_chunk_reinsert(have_chunk_to_process ? &chunk_list[i] : NULL,
+                                                             have_chunk_to_process ? 1 : 0, NULL, io_info,
+                                                             &index_info, mpi_rank, mpi_size) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+                            "couldn't collectively re-insert modified chunks into chunk index")
+
+            /* Free the MPI types, if they were derived */
+            if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            mem_type_is_derived = FALSE;
+            if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            file_type_is_derived = FALSE;
+        } /* end for */
+    }
 
-            if (collective_chunk_list) {
-                H5MM_free(collective_chunk_list);
-                collective_chunk_list = NULL;
-            } /* end if */
-            if (has_chunk_selected_array) {
-                H5MM_free(has_chunk_selected_array);
-                has_chunk_selected_array = NULL;
-            } /* end if */
-        }     /* end for */
+done:
+    /* Free the MPI buf and file types, if they were derived */
+    if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
+        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type)))
+        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
 
-        /* Free the MPI file and memory types, if they were derived */
-        for (i = 0; i < max_num_chunks; i++) {
-            if (file_type_is_derived_array[i])
-                if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type_array[i])))
-                    HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    if (chunk_msg_bufs) {
+        for (i = 0; i < (size_t)chunk_msg_bufs_len; i++)
+            H5MM_free(chunk_msg_bufs[i]);
 
-            if (mem_type_is_derived_array[i])
-                if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type_array[i])))
-                    HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
-        } /* end for */
-    }     /* end else */
+        H5MM_free(chunk_msg_bufs);
+    }
 
-done:
+    HASH_CLEAR(hh, chunk_hash_table);
+
+    /* Free resources used by a rank which had some selection */
     if (chunk_list) {
         for (i = 0; i < chunk_list_num_entries; i++)
             if (chunk_list[i].buf)
@@ -2015,16 +2350,10 @@ done:
         H5MM_free(chunk_list);
     } /* end if */
 
-    if (collective_chunk_list)
-        H5MM_free(collective_chunk_list);
-    if (file_type_array)
-        H5MM_free(file_type_array);
-    if (mem_type_array)
-        H5MM_free(mem_type_array);
-    if (file_type_is_derived_array)
-        H5MM_free(file_type_is_derived_array);
-    if (mem_type_is_derived_array)
-        H5MM_free(mem_type_is_derived_array);
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
 
     FUNC_LEAVE_NOAPI(ret_value)
 } /* end H5D__multi_chunk_filtered_collective_io() */
@@ -2050,11 +2379,22 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
     hbool_t      mbt_is_derived = FALSE;
     hbool_t      mft_is_derived = FALSE;
     MPI_Datatype mpi_file_type, mpi_buf_type;
-    int          mpi_code;            /* MPI return code */
-    herr_t       ret_value = SUCCEED; /* return value */
+    int          mpi_code; /* MPI return code */
+#ifdef H5Dmpio_DEBUG
+    int mpi_rank;
+#endif
+    herr_t ret_value = SUCCEED; /* return value */
 
     FUNC_ENTER_STATIC
 
+#ifdef H5Dmpio_DEBUG
+    mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file);
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Inter collective I/O");
+    if (mpi_rank < 0)
+        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI rank")
+#endif
+
     if ((file_space != NULL) && (mem_space != NULL)) {
         int      mpi_file_count;     /* Number of file "objects" to transfer */
         hsize_t *permute_map = NULL; /* array that holds the mapping from the old,
@@ -2113,9 +2453,8 @@ H5D__inter_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
         mft_is_derived = FALSE;
     } /* end else */
 
-#ifdef H5D_DEBUG
-    if (H5DEBUG(D))
-        HDfprintf(H5DEBUG(D), "before final collective IO \n");
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_DEBUG(mpi_rank, "before final collective I/O");
 #endif
 
     /* Perform final collective I/O operation */
@@ -2129,9 +2468,10 @@ done:
     if (mft_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mpi_file_type)))
         HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
 
-#ifdef H5D_DEBUG
-    if (H5DEBUG(D))
-        HDfprintf(H5DEBUG(D), "before leaving inter_collective_io ret_value = %d\n", ret_value);
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_DEBUG_VA(mpi_rank, "before leaving inter_collective_io ret_value = %d", ret_value);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
 #endif
 
     FUNC_LEAVE_NOAPI(ret_value)
@@ -2153,10 +2493,21 @@ static herr_t
 H5D__final_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_info, hsize_t mpi_buf_count,
                          MPI_Datatype mpi_file_type, MPI_Datatype mpi_buf_type)
 {
+#ifdef H5Dmpio_DEBUG
+    int mpi_rank;
+#endif
     herr_t ret_value = SUCCEED;
 
     FUNC_ENTER_STATIC
 
+#ifdef H5Dmpio_DEBUG
+    mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file);
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Final collective I/O");
+    if (mpi_rank < 0)
+        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain MPI rank")
+#endif
+
     /* Pass buf type, file type to the file driver.  */
     if (H5CX_set_mpi_coll_datatypes(mpi_buf_type, mpi_file_type) < 0)
         HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O collective I/O datatypes")
@@ -2171,10 +2522,12 @@ H5D__final_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
     } /* end else */
 
 done:
-#ifdef H5D_DEBUG
-    if (H5DEBUG(D))
-        HDfprintf(H5DEBUG(D), "ret_value before leaving final_collective_io=%d\n", ret_value);
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_DEBUG_VA(mpi_rank, "ret_value before leaving final_collective_io=%d", ret_value);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
 #endif
+
     FUNC_LEAVE_NOAPI(ret_value)
 } /* end H5D__final_collective_io */
 
@@ -2216,62 +2569,149 @@ H5D__cmp_chunk_addr(const void *chunk_addr_info1, const void *chunk_addr_info2)
  *
  * Return:      -1, 0, 1
  *
- * Programmer:  Jordan Henderson
- *              Wednesday, Nov. 30th, 2016
- *
  *-------------------------------------------------------------------------
  */
 static int
 H5D__cmp_filtered_collective_io_info_entry(const void *filtered_collective_io_info_entry1,
                                            const void *filtered_collective_io_info_entry2)
 {
-    haddr_t addr1 = HADDR_UNDEF, addr2 = HADDR_UNDEF;
+    const H5D_filtered_collective_io_info_t *entry1;
+    const H5D_filtered_collective_io_info_t *entry2;
+    haddr_t                                  addr1 = HADDR_UNDEF;
+    haddr_t                                  addr2 = HADDR_UNDEF;
+    int                                      ret_value;
 
     FUNC_ENTER_STATIC_NOERR
 
-    addr1 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry1)
-                ->chunk_states.new_chunk.offset;
-    addr2 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry2)
-                ->chunk_states.new_chunk.offset;
+    entry1 = (const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry1;
+    entry2 = (const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry2;
 
-    FUNC_LEAVE_NOAPI(H5F_addr_cmp(addr1, addr2))
-} /* end H5D__cmp_filtered_collective_io_info_entry() */
+    addr1 = entry1->chunk_new.offset;
+    addr2 = entry2->chunk_new.offset;
 
-#if MPI_VERSION >= 3
+    /*
+     * If both chunk addresses are defined, H5F_addr_cmp is safe to use.
+     * Otherwise, if both addresses aren't defined, compared chunk
+     * entries based on their chunk index. Finally, if only one chunk
+     * address is defined, return the appropriate value based on which
+     * is defined.
+     */
+    if (H5F_addr_defined(addr1) && H5F_addr_defined(addr2)) {
+        ret_value = H5F_addr_cmp(addr1, addr2);
+    }
+    else if (!H5F_addr_defined(addr1) && !H5F_addr_defined(addr2)) {
+        hsize_t chunk_idx1 = entry1->index_info.chunk_idx;
+        hsize_t chunk_idx2 = entry2->index_info.chunk_idx;
+
+        ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2);
+    }
+    else
+        ret_value = H5F_addr_defined(addr1) ? 1 : -1;
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__cmp_filtered_collective_io_info_entry() */
 
 /*-------------------------------------------------------------------------
- * Function:    H5D__cmp_filtered_collective_io_info_entry_owner
+ * Function:    H5D__cmp_chunk_redistribute_info
  *
- * Purpose:     Routine to compare filtered collective chunk io info
- *              entries's original owner fields
+ * Purpose:     Routine to compare two H5D_chunk_redistribute_info_t
+ *              structures
  *
- * Description: Callback for qsort() to compare filtered collective chunk
- *              io info entries's original owner fields
+ * Description: Callback for qsort() to compare two
+ *              H5D_chunk_redistribute_info_t structures
+ *
+ * Return:      -1, 0, 1
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+H5D__cmp_chunk_redistribute_info(const void *_entry1, const void *_entry2)
+{
+    const H5D_chunk_redistribute_info_t *entry1;
+    const H5D_chunk_redistribute_info_t *entry2;
+    hsize_t                              chunk_index1;
+    hsize_t                              chunk_index2;
+    int                                  ret_value;
+
+    FUNC_ENTER_STATIC_NOERR
+
+    entry1 = (const H5D_chunk_redistribute_info_t *)_entry1;
+    entry2 = (const H5D_chunk_redistribute_info_t *)_entry2;
+
+    chunk_index1 = entry1->chunk_idx;
+    chunk_index2 = entry2->chunk_idx;
+
+    if (chunk_index1 == chunk_index2) {
+        int orig_owner1 = entry1->orig_owner;
+        int orig_owner2 = entry2->orig_owner;
+
+        ret_value = (orig_owner1 > orig_owner2) - (orig_owner1 < orig_owner2);
+    }
+    else
+        ret_value = (chunk_index1 > chunk_index2) - (chunk_index1 < chunk_index2);
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__cmp_chunk_redistribute_info() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__cmp_chunk_redistribute_info_orig_owner
  *
- * Return:      The difference between the two
- *              H5D_filtered_collective_io_info_t's original owner fields
+ * Purpose:     Routine to compare the original owning MPI rank for two
+ *              H5D_chunk_redistribute_info_t structures
  *
- * Programmer:  Jordan Henderson
- *              Monday, Apr. 10th, 2017
+ * Description: Callback for qsort() to compare the original owning MPI
+ *              rank for two H5D_chunk_redistribute_info_t
+ *              structures
+ *
+ * Return:      -1, 0, 1
  *
  *-------------------------------------------------------------------------
  */
 static int
-H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective_io_info_entry1,
-                                                 const void *filtered_collective_io_info_entry2)
+H5D__cmp_chunk_redistribute_info_orig_owner(const void *_entry1, const void *_entry2)
 {
-    int owner1 = -1, owner2 = -1;
+    const H5D_chunk_redistribute_info_t *entry1;
+    const H5D_chunk_redistribute_info_t *entry2;
+    int                                  owner1 = -1;
+    int                                  owner2 = -1;
+    int                                  ret_value;
 
     FUNC_ENTER_STATIC_NOERR
 
-    owner1 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry1)
-                 ->owners.original_owner;
-    owner2 = ((const H5D_filtered_collective_io_info_t *)filtered_collective_io_info_entry2)
-                 ->owners.original_owner;
+    entry1 = (const H5D_chunk_redistribute_info_t *)_entry1;
+    entry2 = (const H5D_chunk_redistribute_info_t *)_entry2;
 
-    FUNC_LEAVE_NOAPI(owner1 - owner2)
-} /* end H5D__cmp_filtered_collective_io_info_entry_owner() */
-#endif
+    owner1 = entry1->orig_owner;
+    owner2 = entry2->orig_owner;
+
+    if (owner1 == owner2) {
+        haddr_t addr1 = entry1->chunk_block.offset;
+        haddr_t addr2 = entry2->chunk_block.offset;
+
+        /*
+         * If both chunk addresses are defined, H5F_addr_cmp is safe to use.
+         * Otherwise, if both addresses aren't defined, compared chunk
+         * entries based on their chunk index. Finally, if only one chunk
+         * address is defined, return the appropriate value based on which
+         * is defined.
+         */
+        if (H5F_addr_defined(addr1) && H5F_addr_defined(addr2)) {
+            ret_value = H5F_addr_cmp(addr1, addr2);
+        }
+        else if (!H5F_addr_defined(addr1) && !H5F_addr_defined(addr2)) {
+            hsize_t chunk_idx1 = entry1->chunk_idx;
+            hsize_t chunk_idx2 = entry2->chunk_idx;
+
+            ret_value = (chunk_idx1 > chunk_idx2) - (chunk_idx1 < chunk_idx2);
+        }
+        else
+            ret_value = H5F_addr_defined(addr1) ? 1 : -1;
+    }
+    else
+        ret_value = (owner1 > owner2) - (owner1 < owner2);
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__cmp_chunk_redistribute_info_orig_owner() */
 
 /*-------------------------------------------------------------------------
  * Function:    H5D__sort_chunk
@@ -2300,7 +2740,7 @@ H5D__cmp_filtered_collective_io_info_entry_owner(const void *filtered_collective
  */
 static herr_t
 H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
-                H5D_chunk_addr_info_t chunk_addr_info_array[], int sum_chunk)
+                H5D_chunk_addr_info_t chunk_addr_info_array[], int sum_chunk, int mpi_rank, int mpi_size)
 {
     H5SL_node_t *     chunk_node;           /* Current node in chunk skip list */
     H5D_chunk_info_t *chunk_info;           /* Current chunking info. of this node. */
@@ -2312,17 +2752,12 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
     hbool_t                 do_sort                = FALSE; /* Whether the addresses need to be sorted */
     int                     bsearch_coll_chunk_threshold;
     int                     many_chunk_opt = H5D_OBTAIN_ONE_CHUNK_ADDR_IND;
-    int                     mpi_size;            /* Number of MPI processes */
     int                     mpi_code;            /* MPI return code */
     int                     i;                   /* Local index variable */
     herr_t                  ret_value = SUCCEED; /* Return value */
 
     FUNC_ENTER_STATIC
 
-    /* Retrieve # of MPI processes */
-    if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size")
-
     /* Calculate the actual threshold to obtain all chunk addresses collectively
      *  The bigger this number is, the more possible the use of obtaining chunk
      * address collectively.
@@ -2336,28 +2771,20 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
         ((sum_chunk / mpi_size) >= H5D_ALL_CHUNK_ADDR_THRES_COL_NUM))
         many_chunk_opt = H5D_OBTAIN_ALL_CHUNK_ADDR_COL;
 
-#ifdef H5D_DEBUG
-    if (H5DEBUG(D))
-        HDfprintf(H5DEBUG(D), "many_chunk_opt= %d\n", many_chunk_opt);
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_DEBUG_VA(mpi_rank, "many_chunk_opt = %d", many_chunk_opt);
 #endif
 
     /* If we need to optimize the way to obtain the chunk address */
     if (many_chunk_opt != H5D_OBTAIN_ONE_CHUNK_ADDR_IND) {
-        int mpi_rank;
-
-#ifdef H5D_DEBUG
-        if (H5DEBUG(D))
-            HDfprintf(H5DEBUG(D), "Coming inside H5D_OBTAIN_ALL_CHUNK_ADDR_COL\n");
+#ifdef H5Dmpio_DEBUG
+        H5D_MPIO_DEBUG(mpi_rank, "Coming inside H5D_OBTAIN_ALL_CHUNK_ADDR_COL");
 #endif
         /* Allocate array for chunk addresses */
         if (NULL == (total_chunk_addr_array =
                          (haddr_t *)H5MM_malloc(sizeof(haddr_t) * (size_t)fm->layout->u.chunk.nchunks)))
             HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, "unable to allocate memory chunk address array")
 
-        /* Retrieve all the chunk addresses with process 0 */
-        if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
-            HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
-
         if (mpi_rank == 0) {
             herr_t result;
 
@@ -2437,10 +2864,10 @@ H5D__sort_chunk(H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
         chunk_node = H5SL_next(chunk_node);
     } /* end while */
 
-#ifdef H5D_DEBUG
-    if (H5DEBUG(D))
-        HDfprintf(H5DEBUG(D), "before Qsort\n");
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_DEBUG(mpi_rank, "before Qsort");
 #endif
+
     if (do_sort) {
         size_t num_chunks = H5SL_count(fm->sel_chunks);
 
@@ -2497,7 +2924,7 @@ done:
  */
 static herr_t
 H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assign_io_mode[],
-                      haddr_t chunk_addr[])
+                      haddr_t chunk_addr[], int mpi_rank, int mpi_size)
 {
     size_t                  total_chunks;
     unsigned                percent_nproc_per_chunk, threshold_nproc_per_chunk;
@@ -2510,7 +2937,6 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig
     H5P_coll_md_read_flag_t md_reads_file_flag;
     hbool_t                 md_reads_context_flag;
     hbool_t                 restore_md_reads_state = FALSE;
-    int                     mpi_size, mpi_rank;
     MPI_Comm                comm;
     int                     root;
     size_t                  ic;
@@ -2523,12 +2949,6 @@ H5D__obtain_mpio_mode(H5D_io_info_t *io_info, H5D_chunk_map_t *fm, uint8_t assig
     root = 0;
     comm = io_info->comm;
 
-    /* Obtain the number of process and the current rank of the process */
-    if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
-    if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size")
-
     /* Setup parameters */
     H5_CHECKED_ASSIGN(total_chunks, size_t, fm->layout->u.chunk.nchunks, hsize_t);
     if (H5CX_get_mpio_chunk_opt_ratio(&percent_nproc_per_chunk) < 0)
@@ -2672,34 +3092,32 @@ done:
 } /* end H5D__obtain_mpio_mode() */
 
 /*-------------------------------------------------------------------------
- * Function:    H5D__construct_filtered_io_info_list
+ * Function:    H5D__mpio_collective_filtered_chunk_io_setup
  *
  * Purpose:     Constructs a list of entries which contain the necessary
  *              information for inter-process communication when performing
  *              collective io on filtered chunks. This list is used by
- *              each process when performing I/O on locally selected chunks
- *              and also in operations that must be collectively done
- *              on every chunk, such as chunk re-allocation, insertion of
- *              chunks into the chunk index, etc.
+ *              each MPI rank when performing I/O on locally selected
+ *              chunks and also in operations that must be collectively
+ *              done on every chunk, such as chunk re-allocation, insertion
+ *              of chunks into the chunk index, etc.
  *
  * Return:      Non-negative on success/Negative on failure
  *
- * Programmer:  Jordan Henderson
- *              Tuesday, January 10th, 2017
- *
  *-------------------------------------------------------------------------
  */
 static herr_t
-H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                     const H5D_chunk_map_t *             fm,
-                                     H5D_filtered_collective_io_info_t **chunk_list, size_t *num_entries)
+H5D__mpio_collective_filtered_chunk_io_setup(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
+                                             const H5D_chunk_map_t *             fm,
+                                             H5D_filtered_collective_io_info_t **chunk_list,
+                                             size_t *num_entries, int mpi_rank)
 {
-    H5D_filtered_collective_io_info_t *local_info_array =
-        NULL; /* The list of initially selected chunks for this process */
-    size_t num_chunks_selected;
-    size_t i;
-    int    mpi_rank;
-    herr_t ret_value = SUCCEED;
+    H5D_filtered_collective_io_info_t *local_info_array = NULL;
+    H5D_chunk_ud_t                     udata;
+    hbool_t                            filter_partial_edge_chunks;
+    size_t                             num_chunks_selected;
+    size_t                             i;
+    herr_t                             ret_value = SUCCEED;
 
     FUNC_ENTER_STATIC
 
@@ -2709,19 +3127,23 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
     HDassert(chunk_list);
     HDassert(num_entries);
 
-    if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Filtered Collective I/O Setup");
+#endif
 
-    /* Each process builds a local list of the chunks they have selected */
+    /* Each rank builds a local list of the chunks they have selected */
     if ((num_chunks_selected = H5SL_count(fm->sel_chunks))) {
         H5D_chunk_info_t *chunk_info;
-        H5D_chunk_ud_t    udata;
         H5SL_node_t *     chunk_node;
         hsize_t           select_npoints;
-        hssize_t          chunk_npoints;
+        hbool_t           need_sort = FALSE;
+
+        /* Determine whether partial edge chunks should be filtered */
+        filter_partial_edge_chunks = !(io_info->dset->shared->layout.u.chunk.flags &
+                                       H5O_LAYOUT_CHUNK_DONT_FILTER_PARTIAL_BOUND_CHUNKS);
 
-        if (NULL == (local_info_array = (H5D_filtered_collective_io_info_t *)H5MM_malloc(
-                         num_chunks_selected * sizeof(H5D_filtered_collective_io_info_t))))
+        if (NULL == (local_info_array = H5MM_malloc(num_chunks_selected * sizeof(*local_info_array))))
             HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate local io info array buffer")
 
         chunk_node = H5SL_first(fm->sel_chunks);
@@ -2732,743 +3154,2846 @@ H5D__construct_filtered_io_info_list(const H5D_io_info_t *io_info, const H5D_typ
             if (H5D__chunk_lookup(io_info->dset, chunk_info->scaled, &udata) < 0)
                 HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address")
 
-            local_info_array[i].index                      = chunk_info->index;
-            local_info_array[i].chunk_states.chunk_current = local_info_array[i].chunk_states.new_chunk =
-                udata.chunk_block;
-            local_info_array[i].num_writers           = 0;
-            local_info_array[i].owners.original_owner = local_info_array[i].owners.new_owner = mpi_rank;
-            local_info_array[i].buf                                                          = NULL;
-
-            local_info_array[i].async_info.num_receive_requests   = 0;
-            local_info_array[i].async_info.receive_buffer_array   = NULL;
-            local_info_array[i].async_info.receive_requests_array = NULL;
-
-            H5MM_memcpy(local_info_array[i].scaled, chunk_info->scaled, sizeof(chunk_info->scaled));
-
-            select_npoints              = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
-            local_info_array[i].io_size = (size_t)select_npoints * type_info->src_type_size;
-
-            /* Currently the full overwrite status of a chunk is only obtained on a per-process
-             * basis. This means that if the total selection in the chunk, as determined by the combination
-             * of selections of all of the processes interested in the chunk, covers the entire chunk,
-             * the performance optimization of not reading the chunk from the file is still valid, but
-             * is not applied in the current implementation. Something like an appropriately placed
-             * MPI_Allreduce or a running total of the number of chunk points selected during chunk
-             * redistribution should suffice for implementing this case - JTH.
+            /* Initialize rank-local chunk info */
+            local_info_array[i].chunk_info     = chunk_info;
+            local_info_array[i].chunk_buf_size = 0;
+            local_info_array[i].num_writers    = 0;
+            local_info_array[i].orig_owner     = mpi_rank;
+            local_info_array[i].new_owner      = mpi_rank;
+            local_info_array[i].buf            = NULL;
+
+            select_npoints              = H5S_GET_SELECT_NPOINTS(chunk_info->fspace);
+            local_info_array[i].io_size = (size_t)select_npoints * type_info->dst_type_size;
+
+            /*
+             * Determine whether this chunk will need to be read from the file. If this is
+             * a read operation, the chunk will be read. If this is a write operation, we
+             * generally need to read a filtered chunk from the file before modifying it,
+             * unless the chunk is being fully overwritten.
+             *
+             * TODO: Currently the full overwrite status of a chunk is only obtained on a
+             * per-rank basis. This means that if the total selection in the chunk, as
+             * determined by the combination of selections of all of the ranks interested in
+             * the chunk, covers the entire chunk, the performance optimization of not reading
+             * the chunk from the file is still valid, but is not applied in the current
+             * implementation.
+             *
+             * To implement this case, a few approaches were considered:
+             *
+             *  - Keep a running total (distributed to each rank) of the number of chunk
+             *    elements selected during chunk redistribution and compare that to the total
+             *    number of elements in the chunk once redistribution is finished
+             *
+             *  - Process all incoming chunk messages before doing I/O (these are currently
+             *    processed AFTER doing I/O), combine the owning rank's selection in a chunk
+             *    with the selections received from other ranks and check to see whether that
+             *    combined selection covers the entire chunk
+             *
+             * The first approach will be dangerous if the application performs an overlapping
+             * write to a chunk, as the number of selected elements can equal or exceed the
+             * number of elements in the chunk without the whole chunk selection being covered.
+             * While it might be considered erroneous for an application to do an overlapping
+             * write, we don't explicitly disallow it.
+             *
+             * The second approach contains a bit of complexity in that part of the chunk
+             * messages will be needed before doing I/O and part will be needed after doing I/O.
+             * Since modification data from chunk messages can't be applied until after any I/O
+             * is performed (otherwise, we'll overwrite any applied modification data), chunk
+             * messages are currently entirely processed after I/O. However, in order to determine
+             * if a chunk is being fully overwritten, we need the dataspace portion of the chunk
+             * messages before doing I/O. The naive way to do this is to process chunk messages
+             * twice, using just the relevant information from the message before and after I/O.
+             * The better way would be to avoid processing chunk messages twice by extracting (and
+             * keeping around) the dataspace portion of the message before I/O and processing the
+             * rest of the chunk message after I/O. Note that the dataspace portion of each chunk
+             * message is used to correctly apply chunk modification data from the message, so
+             * must be kept around both before and after I/O in this case.
              */
-            if ((chunk_npoints = H5S_GET_EXTENT_NPOINTS(chunk_info->fspace)) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
-            local_info_array[i].full_overwrite =
-                (local_info_array[i].io_size >= (hsize_t)chunk_npoints * type_info->dst_type_size) ? TRUE
-                                                                                                   : FALSE;
+            if (io_info->op_type == H5D_IO_OP_READ)
+                local_info_array[i].need_read = TRUE;
+            else {
+                local_info_array[i].need_read =
+                    local_info_array[i].io_size < (size_t)io_info->dset->shared->layout.u.chunk.size;
+            }
 
-            chunk_node = H5SL_next(chunk_node);
-        } /* end for */
-    }     /* end if */
+            local_info_array[i].skip_filter_pline = FALSE;
+            if (!filter_partial_edge_chunks) {
+                /*
+                 * If this is a partial edge chunk and the "don't filter partial edge
+                 * chunks" flag is set, make sure not to apply filters to the chunk.
+                 */
+                if (H5D__chunk_is_partial_edge_chunk(io_info->dset->shared->ndims,
+                                                     io_info->dset->shared->layout.u.chunk.dim,
+                                                     chunk_info->scaled, io_info->dset->shared->curr_dims))
+                    local_info_array[i].skip_filter_pline = TRUE;
+            }
 
-    /* Redistribute shared chunks to new owners as necessary */
-    if (io_info->op_type == H5D_IO_OP_WRITE)
-#if MPI_VERSION >= 3
-        if (H5D__chunk_redistribute_shared_chunks(io_info, type_info, fm, local_info_array,
-                                                  &num_chunks_selected) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "unable to redistribute shared chunks")
-#else
-        HGOTO_ERROR(
-            H5E_DATASET, H5E_WRITEERROR, FAIL,
-            "unable to redistribute shared chunks - MPI version < 3 (MPI_Mprobe and MPI_Imrecv missing)")
-#endif
+            /* Initialize the chunk's shared info */
+            local_info_array[i].chunk_current = udata.chunk_block;
+            local_info_array[i].chunk_new     = udata.chunk_block;
 
-    *chunk_list  = local_info_array;
-    *num_entries = num_chunks_selected;
+            /*
+             * Check if the list is not in ascending order of offset in the file
+             * or has unallocated chunks. In either case, the list should get
+             * sorted.
+             */
+            if (i) {
+                haddr_t curr_chunk_offset = local_info_array[i].chunk_current.offset;
+                haddr_t prev_chunk_offset = local_info_array[i - 1].chunk_current.offset;
 
-done:
-    FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D__construct_filtered_io_info_list() */
+                if (!H5F_addr_defined(prev_chunk_offset) || !H5F_addr_defined(curr_chunk_offset) ||
+                    (curr_chunk_offset < prev_chunk_offset))
+                    need_sort = TRUE;
+            }
 
-#if MPI_VERSION >= 3
+            /*
+             * Extensible arrays may calculate a chunk's index a little differently
+             * than normal when the dataset's unlimited dimension is not the
+             * slowest-changing dimension, so set the index here based on what the
+             * extensible array code calculated instead of what was calculated
+             * in the chunk file mapping.
+             */
+            if (io_info->dset->shared->layout.u.chunk.idx_type == H5D_CHUNK_IDX_EARRAY)
+                local_info_array[i].index_info.chunk_idx = udata.chunk_idx;
+            else
+                local_info_array[i].index_info.chunk_idx = chunk_info->index;
 
-/*-------------------------------------------------------------------------
- * Function:    H5D__chunk_redistribute_shared_chunks
- *
- * Purpose:     When performing a collective write on a Dataset with
- *              filters applied, this function is used to redistribute any
- *              chunks which are selected by more than one process, so as
- *              to preserve file integrity after the write by ensuring
- *              that any shared chunks are only modified by one process.
- *
- *              The current implementation follows this 3-phase process:
- *
- *              - Collect everyone's list of chunks into one large list,
- *                sort the list in increasing order of chunk offset in the
- *                file and hand the list off to rank 0
- *
- *              - Rank 0 scans the list looking for matching runs of chunk
- *                offset in the file (corresponding to a shared chunk which
- *                has been selected by more than one rank in the I/O
- *                operation) and for each shared chunk, it redistributes
- *                the chunk to the process writing to the chunk which
- *                currently has the least amount of chunks assigned to it
- *                by modifying the "new_owner" field in each of the list
- *                entries corresponding to that chunk
- *
- *              - After the chunks have been redistributed, rank 0 re-sorts
- *                the list in order of previous owner so that each rank
- *                will get back exactly the array that they contributed to
- *                the redistribution operation, with the "new_owner" field
- *                of each chunk they are modifying having possibly been
- *                modified. Rank 0 then scatters each segment of the list
- *                back to its corresponding rank
+            local_info_array[i].index_info.filter_mask = udata.filter_mask;
+            local_info_array[i].index_info.need_insert = FALSE;
+
+            chunk_node = H5SL_next(chunk_node);
+        }
+
+        /* Ensure the chunk list is sorted in ascending order of offset in the file */
+        if (need_sort)
+            HDqsort(local_info_array, num_chunks_selected, sizeof(H5D_filtered_collective_io_info_t),
+                    H5D__cmp_filtered_collective_io_info_entry);
+
+#ifdef H5Dmpio_DEBUG
+        H5D__mpio_dump_collective_filtered_chunk_list(local_info_array, num_chunks_selected, mpi_rank);
+#endif
+    }
+    else if (H5F_get_coll_metadata_reads(io_info->dset->oloc.file)) {
+        hsize_t scaled[H5O_LAYOUT_NDIMS] = {0};
+
+        /*
+         * If this rank has no selection in the dataset and collective
+         * metadata reads are enabled, do a fake lookup of a chunk to
+         * ensure that this rank has the chunk index opened. Otherwise,
+         * only the ranks that had a selection will have opened the
+         * chunk index and they will have done so independently. Therefore,
+         * when ranks with no selection participate in later collective
+         * metadata reads, they will try to open the chunk index collectively
+         * and issues will occur since other ranks won't participate.
+         *
+         * In the future, we should consider having a chunk index "open"
+         * callback that can be used to ensure collectivity between ranks
+         * in a more natural way, but this hack should suffice for now.
+         */
+        if (H5D__chunk_lookup(io_info->dset, scaled, &udata) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "error looking up chunk address")
+    }
+
+    *chunk_list  = local_info_array;
+    *num_entries = num_chunks_selected;
+
+done:
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_collective_filtered_chunk_io_setup() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_redistribute_shared_chunks
+ *
+ * Purpose:     When performing a parallel write on a chunked Dataset with
+ *              filters applied, we must ensure that any particular chunk
+ *              is only written to by a single MPI rank in order to avoid
+ *              potential data races on the chunk. This function is used to
+ *              redistribute (by assigning ownership to a single rank) any
+ *              chunks which are selected by more than one MPI rank.
+ *
+ *              An initial Allgather is performed to determine how many
+ *              chunks each rank has selected in the write operation and
+ *              then that number is compared against a threshold value to
+ *              determine whether chunk redistribution should be done on
+ *              MPI rank 0 only, or on all MPI ranks.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_redistribute_shared_chunks(H5D_filtered_collective_io_info_t *chunk_list,
+                                     size_t chunk_list_num_entries, const H5D_io_info_t *io_info,
+                                     const H5D_chunk_map_t *fm, int mpi_rank, int mpi_size,
+                                     size_t **rank_chunks_assigned_map)
+{
+    hbool_t redistribute_on_all_ranks;
+    size_t *num_chunks_map       = NULL;
+    size_t  coll_chunk_list_size = 0;
+    size_t  i;
+    int     mpi_code;
+    herr_t  ret_value = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(chunk_list || 0 == chunk_list_num_entries);
+    HDassert(io_info);
+    HDassert(fm);
+    HDassert(mpi_size > 1); /* No chunk sharing is possible for MPI Comm size of 1 */
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Redistribute shared chunks");
+#endif
+
+    /*
+     * Allocate an array for each rank to keep track of the number of
+     * chunks assigned to any other rank in order to cut down on future
+     * MPI communication.
+     */
+    if (NULL == (num_chunks_map = H5MM_malloc((size_t)mpi_size * sizeof(*num_chunks_map))))
+        HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate assigned chunks array")
+
+    /* Perform initial Allgather to determine the collective chunk list size */
+    if (MPI_SUCCESS != (mpi_code = MPI_Allgather(&chunk_list_num_entries, 1, H5_SIZE_T_AS_MPI_TYPE,
+                                                 num_chunks_map, 1, H5_SIZE_T_AS_MPI_TYPE, io_info->comm)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        coll_chunk_list_size += num_chunks_map[i];
+
+    /*
+     * Determine whether we should perform chunk redistribution on all
+     * ranks or just rank 0. For a relatively small number of chunks,
+     * we redistribute on all ranks to cut down on MPI communication
+     * overhead. For a larger number of chunks, we redistribute on
+     * rank 0 only to cut down on memory usage.
+     */
+    redistribute_on_all_ranks = coll_chunk_list_size < H5D_CHUNK_REDISTRIBUTE_THRES;
+
+    if (H5D__mpio_redistribute_shared_chunks_int(chunk_list, num_chunks_map, redistribute_on_all_ranks,
+                                                 io_info, fm, mpi_rank, mpi_size) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTREDISTRIBUTE, FAIL, "can't redistribute shared chunks")
+
+    /*
+     * If the caller provided a pointer for the mapping from
+     * rank value -> number of chunks assigned, return that
+     * mapping here.
+     */
+    if (rank_chunks_assigned_map) {
+        /*
+         * If we performed chunk redistribution on rank 0 only, distribute
+         * the rank value -> number of chunks assigned mapping back to all
+         * ranks.
+         */
+        if (!redistribute_on_all_ranks) {
+            if (MPI_SUCCESS !=
+                (mpi_code = MPI_Bcast(num_chunks_map, mpi_size, H5_SIZE_T_AS_MPI_TYPE, 0, io_info->comm)))
+                HMPI_GOTO_ERROR(FAIL, "couldn't broadcast chunk mapping to other ranks", mpi_code)
+        }
+
+        *rank_chunks_assigned_map = num_chunks_map;
+    }
+
+done:
+    if (!rank_chunks_assigned_map || (ret_value < 0)) {
+        num_chunks_map = H5MM_xfree(num_chunks_map);
+    }
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_redistribute_shared_chunks() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_redistribute_shared_chunks_int
+ *
+ * Purpose:     Routine to perform redistribution of shared chunks during
+ *              parallel writes to datasets with filters applied.
+ *
+ *              If `all_ranks_involved` is TRUE, chunk redistribution
+ *              occurs on all MPI ranks. This is usually done when there
+ *              is a relatively small number of chunks involved in order to
+ *              cut down on MPI communication overhead while increasing
+ *              total memory usage a bit.
+ *
+ *              If `all_ranks_involved` is FALSE, only rank 0 will perform
+ *              chunk redistribution. This is usually done when there is
+ *              a relatively large number of chunks involved in order to
+ *              cut down on total memory usage at the cost of increased
+ *              overhead from MPI communication.
+ *
+ *              This implementation is as follows:
+ *
+ *              - All MPI ranks send their list of selected chunks to the
+ *                ranks involved in chunk redistribution. Then, the
+ *                involved ranks sort this new list in order of chunk
+ *                index.
+ *
+ *              - The involved ranks scan the list looking for matching
+ *                runs of chunk index values (corresponding to a shared
+ *                chunk which has been selected by more than one rank in
+ *                the I/O operation) and for each shared chunk,
+ *                redistribute the chunk to the MPI rank writing to the
+ *                chunk which currently has the least amount of chunks
+ *                assigned to it. This is done by modifying the "new_owner"
+ *                field in each of the list entries corresponding to that
+ *                chunk. The involved ranks then re-sort the list in order
+ *                of original chunk owner so that each rank's section of
+ *                contributed chunks is contiguous in the collective chunk
+ *                list.
+ *
+ *              - If chunk redistribution occurred on all ranks, each rank
+ *                scans through the collective chunk list to find their
+ *                contributed section of chunks and uses that to update
+ *                their local chunk list with the newly-updated "new_owner"
+ *                and "num_writers" fields. If chunk redistribution
+ *                occurred only on rank 0, an MPI_Scatterv operation will
+ *                be used to scatter the segments of the collective chunk
+ *                list from rank 0 back to the corresponding ranks.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_redistribute_shared_chunks_int(H5D_filtered_collective_io_info_t *chunk_list,
+                                         size_t *num_chunks_assigned_map, hbool_t all_ranks_involved,
+                                         const H5D_io_info_t *io_info, const H5D_chunk_map_t *fm,
+                                         int mpi_rank, int mpi_size)
+{
+    MPI_Datatype struct_type;
+    MPI_Datatype packed_type;
+    hbool_t      struct_type_derived = FALSE;
+    hbool_t      packed_type_derived = FALSE;
+    size_t       i;
+    size_t       coll_chunk_list_num_entries = 0;
+    void *       coll_chunk_list             = NULL;
+    int *        counts_disps_array          = NULL;
+    int *        counts_ptr                  = NULL;
+    int *        displacements_ptr           = NULL;
+    int          num_chunks_int;
+    int          mpi_code;
+    herr_t       ret_value = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(num_chunks_assigned_map);
+    HDassert(chunk_list || 0 == num_chunks_assigned_map[mpi_rank]);
+    HDassert(io_info);
+    HDassert(fm);
+    HDassert(mpi_size > 1);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Redistribute shared chunks (internal)");
+#endif
+
+    /*
+     * Make sure it's safe to cast this rank's number
+     * of chunks to be sent into an int for MPI
+     */
+    H5_CHECKED_ASSIGN(num_chunks_int, int, num_chunks_assigned_map[mpi_rank], size_t);
+
+    /*
+     * Phase 1 - Participate in collective gathering of every rank's
+     * list of chunks to the ranks which are performing the redistribution
+     * operation.
+     */
+
+    if (all_ranks_involved || (mpi_rank == 0)) {
+        /*
+         * Allocate array to store the receive counts of each rank, as well as
+         * the displacements into the final array where each rank will place
+         * their data. The first half of the array contains the receive counts
+         * (in rank order), while the latter half contains the displacements
+         * (also in rank order).
+         */
+        if (NULL == (counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*counts_disps_array)))) {
+            /* Push an error, but still participate in collective gather operation */
+            HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+                        "couldn't allocate receive counts and displacements array")
+        }
+        else {
+            /* Set the receive counts from the assigned chunks map */
+            counts_ptr = counts_disps_array;
+
+            for (i = 0; i < (size_t)mpi_size; i++)
+                H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t);
+
+            /* Set the displacements into the receive buffer for the gather operation */
+            displacements_ptr = &counts_disps_array[mpi_size];
+
+            *displacements_ptr = 0;
+            for (i = 1; i < (size_t)mpi_size; i++)
+                displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1];
+        }
+    }
+
+    /*
+     * Construct MPI derived types for extracting information
+     * necessary for MPI communication
+     */
+    if (H5D__mpio_get_chunk_redistribute_info_types(&packed_type, &packed_type_derived, &struct_type,
+                                                    &struct_type_derived) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                    "can't create derived datatypes for chunk redistribution info")
+
+    /* Perform gather operation */
+    if (H5_mpio_gatherv_alloc(chunk_list, num_chunks_int, struct_type, counts_ptr, displacements_ptr,
+                              packed_type, all_ranks_involved, 0, io_info->comm, mpi_rank, mpi_size,
+                              &coll_chunk_list, &coll_chunk_list_num_entries) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL,
+                    "can't gather chunk redistribution info to involved ranks")
+
+    /*
+     * If all ranks are redistributing shared chunks, we no
+     * longer need the receive counts and displacements array
+     */
+    if (all_ranks_involved) {
+        counts_disps_array = H5MM_xfree(counts_disps_array);
+    }
+
+    /*
+     * Phase 2 - Involved ranks now redistribute any shared chunks to new
+     * owners as necessary.
+     */
+
+    if (all_ranks_involved || (mpi_rank == 0)) {
+        H5D_chunk_redistribute_info_t *chunk_entry;
+        hsize_t                        curr_chunk_idx;
+        size_t                         set_begin_index;
+        int                            num_writers;
+        int                            new_chunk_owner;
+
+        /* Clear the mapping from rank value -> number of assigned chunks */
+        HDmemset(num_chunks_assigned_map, 0, (size_t)mpi_size * sizeof(*num_chunks_assigned_map));
+
+        /* Sort collective chunk list according to chunk index */
+        HDqsort(coll_chunk_list, coll_chunk_list_num_entries, sizeof(H5D_chunk_redistribute_info_t),
+                H5D__cmp_chunk_redistribute_info);
+
+        /*
+         * Process all chunks in the collective chunk list.
+         * Note that the loop counter is incremented by both
+         * the outer loop (while processing each entry in
+         * the collective chunk list) and the inner loop
+         * (while processing duplicate entries for shared
+         * chunks).
+         */
+        chunk_entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[0];
+        for (i = 0; i < coll_chunk_list_num_entries;) {
+            /* Set chunk's initial new owner to its original owner */
+            new_chunk_owner = chunk_entry->orig_owner;
+
+            /*
+             * Set the current chunk index so we know when we've processed
+             * all duplicate entries for a particular shared chunk
+             */
+            curr_chunk_idx = chunk_entry->chunk_idx;
+
+            /* Reset the initial number of writers to this chunk */
+            num_writers = 0;
+
+            /* Set index for the beginning of this section of duplicate chunk entries */
+            set_begin_index = i;
+
+            /*
+             * Process each chunk entry in the set for the current
+             * (possibly shared) chunk and increment the loop counter
+             * while doing so.
+             */
+            do {
+                /*
+                 * The new owner of the chunk is determined by the rank
+                 * writing to the chunk which currently has the least amount
+                 * of chunks assigned to it
+                 */
+                if (num_chunks_assigned_map[chunk_entry->orig_owner] <
+                    num_chunks_assigned_map[new_chunk_owner])
+                    new_chunk_owner = chunk_entry->orig_owner;
+
+                /* Update the number of writers to this particular chunk */
+                num_writers++;
+
+                chunk_entry++;
+            } while (++i < coll_chunk_list_num_entries && chunk_entry->chunk_idx == curr_chunk_idx);
+
+            /* We should never have more writers to a chunk than the number of MPI ranks */
+            HDassert(num_writers <= mpi_size);
+
+            /* Set all processed chunk entries' "new_owner" and "num_writers" fields */
+            for (; set_begin_index < i; set_begin_index++) {
+                H5D_chunk_redistribute_info_t *entry;
+
+                entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[set_begin_index];
+
+                entry->new_owner   = new_chunk_owner;
+                entry->num_writers = num_writers;
+            }
+
+            /* Update the number of chunks assigned to the MPI rank that now owns this chunk */
+            num_chunks_assigned_map[new_chunk_owner]++;
+        }
+
+        /*
+         * Re-sort the collective chunk list in order of original chunk owner
+         * so that each rank's section of contributed chunks is contiguous in
+         * the collective chunk list.
+         *
+         * NOTE: this re-sort is frail in that it needs to sort the collective
+         *       chunk list so that each rank's section of contributed chunks
+         *       is in the exact order it was contributed in, or things will
+         *       be scrambled when each rank's local chunk list is updated.
+         *       Therefore, the sorting algorithm here is tied to the one
+         *       used during the I/O setup operation. Specifically, chunks
+         *       are first sorted by ascending order of offset in the file and
+         *       then by chunk index. In the future, a better redistribution
+         *       algorithm may be devised that doesn't rely on frail sorting,
+         *       but the current implementation is a quick and naive approach.
+         */
+        HDqsort(coll_chunk_list, coll_chunk_list_num_entries, sizeof(H5D_chunk_redistribute_info_t),
+                H5D__cmp_chunk_redistribute_info_orig_owner);
+    }
+
+    if (all_ranks_involved) {
+        /*
+         * If redistribution occurred on all ranks, search for the section
+         * in the collective chunk list corresponding to this rank's locally
+         * selected chunks and update the local list after redistribution.
+         */
+        for (i = 0; i < coll_chunk_list_num_entries; i++)
+            if (mpi_rank == ((H5D_chunk_redistribute_info_t *)coll_chunk_list)[i].orig_owner)
+                break;
+
+        for (size_t j = 0; j < (size_t)num_chunks_int; j++) {
+            H5D_chunk_redistribute_info_t *coll_entry;
+
+            coll_entry = &((H5D_chunk_redistribute_info_t *)coll_chunk_list)[i++];
+
+            chunk_list[j].new_owner   = coll_entry->new_owner;
+            chunk_list[j].num_writers = coll_entry->num_writers;
+        }
+    }
+    else {
+        /*
+         * If redistribution occurred only on rank 0, scatter the segments
+         * of the collective chunk list back to each rank so that their
+         * local chunk lists get updated
+         */
+        if (MPI_SUCCESS !=
+            (mpi_code = MPI_Scatterv(coll_chunk_list, counts_ptr, displacements_ptr, packed_type, chunk_list,
+                                     num_chunks_int, struct_type, 0, io_info->comm)))
+            HMPI_GOTO_ERROR(FAIL, "unable to scatter shared chunks info buffer", mpi_code)
+    }
+
+#ifdef H5Dmpio_DEBUG
+    H5D__mpio_dump_collective_filtered_chunk_list(chunk_list, num_chunks_assigned_map[mpi_rank], mpi_rank);
+#endif
+
+done:
+    H5MM_free(coll_chunk_list);
+
+    if (struct_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+    if (packed_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&packed_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+
+    H5MM_free(counts_disps_array);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_redistribute_shared_chunks_int() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_share_chunk_modification_data
+ *
+ * Purpose:     When performing a parallel write on a chunked dataset with
+ *              filters applied, we must first ensure that any particular
+ *              chunk is only written to by a single MPI rank in order to
+ *              avoid potential data races on the chunk. Once dataset
+ *              chunks have been redistributed in a suitable manner, each
+ *              MPI rank must send its chunk data to other ranks for each
+ *              chunk it no longer owns.
+ *
+ *              The current implementation here follows the Nonblocking
+ *              Consensus algorithm described in:
+ *              http://unixer.de/publications/img/hoefler-dsde-protocols.pdf
+ *
+ *              First, each MPI rank scans through its list of selected
+ *              chunks and does the following for each chunk:
+ *
+ *               * If a chunk in the MPI rank's chunk list is still owned
+ *                 by that rank, the rank checks how many messages are
+ *                 incoming for that chunk and adds that to its running
+ *                 total. Then, the rank updates its local chunk list so
+ *                 that any previous chunk entries for chunks that are no
+ *                 longer owned by the rank get overwritten by chunk
+ *                 entries for chunks the rank still owns. Since the data
+ *                 for the chunks no longer owned will have already been
+ *                 sent, those chunks can effectively be discarded.
+ *               * If a chunk in the MPI rank's chunk list is no longer
+ *                 owned by that rank, the rank sends the data it wishes to
+ *                 update the chunk with to the MPI rank that now has
+ *                 ownership of that chunk. To do this, it encodes the
+ *                 chunk's index, its selection in the chunk and its
+ *                 modification data into a buffer and then posts a
+ *                 non-blocking MPI_Issend to the owning rank.
+ *
+ *              Once this step is complete, all MPI ranks allocate arrays
+ *              to hold chunk message receive buffers and MPI request
+ *              objects for each non-blocking receive they will post for
+ *              incoming chunk modification messages. Then, all MPI ranks
+ *              enter a loop that alternates between non-blocking
+ *              MPI_Iprobe calls to probe for incoming messages and
+ *              MPI_Testall calls to see if all send requests have
+ *              completed. As chunk modification messages arrive,
+ *              non-blocking MPI_Irecv calls will be posted for each
+ *              message.
+ *
+ *              Once all send requests have completed, an MPI_Ibarrier is
+ *              posted and the loop then alternates between MPI_Iprobe
+ *              calls and MPI_Test calls to check if all ranks have reached
+ *              the non-blocking barrier. Once all ranks have reached the
+ *              barrier, processing can move on to updating the selected
+ *              chunks that are owned in the operation.
+ *
+ *              Any chunk messages that were received from other ranks
+ *              will be returned through the `chunk_msg_bufs` array and
+ *              `chunk_msg_bufs_len` will be set appropriately.
+ *
+ *              NOTE: The use of non-blocking sends and receives of chunk
+ *                    data here may contribute to large amounts of memory
+ *                    usage/MPI request overhead if the number of shared
+ *                    chunks is high. If this becomes a problem, it may be
+ *                    useful to split the message receiving loop away so
+ *                    that chunk modification messages can be received and
+ *                    processed immediately (MPI_Recv) using a single chunk
+ *                    message buffer. However, it's possible this may
+ *                    degrade performance since the chunk message sends
+ *                    are synchronous (MPI_Issend) in the Nonblocking
+ *                    Consensus algorithm.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_share_chunk_modification_data(H5D_filtered_collective_io_info_t *chunk_list,
+                                        size_t *chunk_list_num_entries, H5D_io_info_t *io_info,
+                                        const H5D_type_info_t *type_info, int mpi_rank, int mpi_size,
+                                        H5D_filtered_collective_io_info_t **chunk_hash_table,
+                                        unsigned char ***chunk_msg_bufs, int *chunk_msg_bufs_len)
+{
+#if MPI_VERSION >= 3
+    H5D_filtered_collective_io_info_t *chunk_table       = NULL;
+    H5S_sel_iter_t *                   mem_iter          = NULL;
+    unsigned char **                   msg_send_bufs     = NULL;
+    unsigned char **                   msg_recv_bufs     = NULL;
+    MPI_Request *                      send_requests     = NULL;
+    MPI_Request *                      recv_requests     = NULL;
+    MPI_Request                        ibarrier          = MPI_REQUEST_NULL;
+    hbool_t                            mem_iter_init     = FALSE;
+    hbool_t                            ibarrier_posted   = FALSE;
+    size_t                             send_bufs_nalloc  = 0;
+    size_t                             num_send_requests = 0;
+    size_t                             num_recv_requests = 0;
+    size_t                             num_msgs_incoming = 0;
+    size_t                             last_assigned_idx;
+    size_t                             i;
+    int                                mpi_code;
+    herr_t                             ret_value = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(chunk_list_num_entries);
+    HDassert(chunk_list || 0 == *chunk_list_num_entries);
+    HDassert(io_info);
+    HDassert(type_info);
+    HDassert(mpi_size > 1);
+    HDassert(chunk_msg_bufs);
+    HDassert(chunk_msg_bufs_len);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Share chunk modification data");
+#endif
+
+    /* Set to latest format for encoding dataspace */
+    H5CX_set_libver_bounds(NULL);
+
+    if (*chunk_list_num_entries) {
+        /* Allocate a selection iterator for iterating over chunk dataspaces */
+        if (NULL == (mem_iter = H5FL_MALLOC(H5S_sel_iter_t)))
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate dataspace selection iterator")
+
+        /*
+         * Allocate send buffer and MPI_Request arrays for non-blocking
+         * sends of outgoing chunk messages
+         */
+        send_bufs_nalloc = H5D_CHUNK_NUM_SEND_MSGS_INIT;
+        if (NULL == (msg_send_bufs = H5MM_malloc(send_bufs_nalloc * sizeof(*msg_send_bufs))))
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
+                        "couldn't allocate chunk modification message buffer array")
+
+        if (NULL == (send_requests = H5MM_malloc(send_bufs_nalloc * sizeof(*send_requests))))
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate send requests array")
+    }
+
+    /*
+     * For each chunk this rank owns, add to the total number of
+     * incoming MPI messages, then update the local chunk list to
+     * overwrite any previous chunks no longer owned by this rank.
+     * Since the data for those chunks will have already been sent,
+     * this rank should no longer be interested in them and they
+     * can effectively be discarded. This bookkeeping also makes
+     * the code for the collective file space re-allocation and
+     * chunk re-insertion operations a bit simpler.
+     *
+     * For each chunk this rank doesn't own, use non-blocking
+     * synchronous sends to send the data this rank is writing to
+     * the rank that does own the chunk.
+     */
+    for (i = 0, last_assigned_idx = 0; i < *chunk_list_num_entries; i++) {
+        H5D_filtered_collective_io_info_t *chunk_entry = &chunk_list[i];
+
+        if (mpi_rank == chunk_entry->new_owner) {
+            num_msgs_incoming += (size_t)(chunk_entry->num_writers - 1);
+
+            /*
+             * Overwrite chunk entries this rank doesn't own with entries that it
+             * does own, since it has sent the necessary data and is no longer
+             * interested in the chunks it doesn't own.
+             */
+            chunk_list[last_assigned_idx] = chunk_list[i];
+
+            /*
+             * Since, at large scale, a chunk's index value may be larger than
+             * the maximum value that can be stored in an int, we cannot rely
+             * on using a chunk's index value as the tag for the MPI messages
+             * sent/received for a chunk. Therefore, add this chunk to a hash
+             * table with the chunk's index as a key so that we can quickly find
+             * the chunk when processing chunk messages that were received. The
+             * message itself will contain the chunk's index so we can update
+             * the correct chunk with the received data.
+             */
+            HASH_ADD(hh, chunk_table, index_info.chunk_idx, sizeof(hsize_t), &chunk_list[last_assigned_idx]);
+
+            last_assigned_idx++;
+        }
+        else {
+            H5D_chunk_info_t *chunk_info = chunk_entry->chunk_info;
+            unsigned char *   mod_data_p = NULL;
+            hsize_t           iter_nelmts;
+            size_t            mod_data_size = 0;
+            size_t            space_size    = 0;
+
+            /* Add the size of the chunk index to the encoded size */
+            mod_data_size += sizeof(hsize_t);
+
+            /* Determine size of serialized chunk file dataspace */
+            if (H5S_encode(chunk_info->fspace, &mod_data_p, &space_size) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to get encoded dataspace size")
+            mod_data_size += space_size;
+
+            /* Determine size of data being written */
+            iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
+            H5_CHECK_OVERFLOW(iter_nelmts, hsize_t, size_t);
+
+            mod_data_size += (size_t)iter_nelmts * type_info->src_type_size;
+
+            if (NULL == (msg_send_bufs[num_send_requests] = H5MM_malloc(mod_data_size)))
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
+                            "couldn't allocate chunk modification message buffer")
+
+            mod_data_p = msg_send_bufs[num_send_requests];
+
+            /* Store the chunk's index into the buffer */
+            HDmemcpy(mod_data_p, &chunk_entry->index_info.chunk_idx, sizeof(hsize_t));
+            mod_data_p += sizeof(hsize_t);
+
+            /* Serialize the chunk's file dataspace into the buffer */
+            if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to encode dataspace")
+
+            /* Initialize iterator for memory selection */
+            if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size,
+                                     H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
+                            "unable to initialize memory selection information")
+            mem_iter_init = TRUE;
+
+            /* Collect the modification data into the buffer */
+            if (0 == H5D__gather_mem(io_info->u.wbuf, mem_iter, (size_t)iter_nelmts, mod_data_p))
+                HGOTO_ERROR(H5E_IO, H5E_CANTGATHER, FAIL, "couldn't gather from write buffer")
+
+            /*
+             * Ensure that the size of the chunk data being sent can be
+             * safely cast to an int for MPI. Note that this should
+             * generally be OK for now (unless a rank is sending a
+             * whole 32-bit-sized chunk of data + its encoded selection),
+             * but if we allow larger than 32-bit-sized chunks in the
+             * future, this may become a problem and derived datatypes
+             * will need to be used.
+             */
+            H5_CHECK_OVERFLOW(mod_data_size, size_t, int)
+
+            /* Send modification data to new owner */
+            if (MPI_SUCCESS !=
+                (mpi_code = MPI_Issend(msg_send_bufs[num_send_requests], (int)mod_data_size, MPI_BYTE,
+                                       chunk_entry->new_owner, H5D_CHUNK_MOD_DATA_TAG, io_info->comm,
+                                       &send_requests[num_send_requests])))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Issend failed", mpi_code)
+
+            num_send_requests++;
+
+            /* Resize send buffer and send request arrays if necessary */
+            if (num_send_requests == send_bufs_nalloc) {
+                void *tmp_alloc;
+
+                send_bufs_nalloc = (size_t)((double)send_bufs_nalloc * 1.5);
+
+                if (NULL ==
+                    (tmp_alloc = H5MM_realloc(msg_send_bufs, send_bufs_nalloc * sizeof(*msg_send_bufs))))
+                    HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
+                                "couldn't resize chunk modification message buffer array")
+                msg_send_bufs = tmp_alloc;
+
+                if (NULL ==
+                    (tmp_alloc = H5MM_realloc(send_requests, send_bufs_nalloc * sizeof(*send_requests))))
+                    HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't resize send requests array")
+                send_requests = tmp_alloc;
+            }
+
+            if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release memory selection iterator")
+            mem_iter_init = FALSE;
+        }
+    }
+
+    /* Check if the number of send or receive requests will overflow an int (MPI requirement) */
+    if (num_send_requests > INT_MAX || num_msgs_incoming > INT_MAX)
+        HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL,
+                    "too many shared chunks in parallel filtered write operation")
+
+    H5_CHECK_OVERFLOW(num_send_requests, size_t, int)
+    H5_CHECK_OVERFLOW(num_msgs_incoming, size_t, int)
+
+    /*
+     * Allocate receive buffer and MPI_Request arrays for non-blocking
+     * receives of incoming chunk messages
+     */
+    if (num_msgs_incoming) {
+        if (NULL == (msg_recv_bufs = H5MM_malloc(num_msgs_incoming * sizeof(*msg_recv_bufs))))
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
+                        "couldn't allocate chunk modification message buffer array")
+
+        if (NULL == (recv_requests = H5MM_malloc(num_msgs_incoming * sizeof(*recv_requests))))
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate receive requests array")
+    }
+
+    /* Process any incoming messages until everyone is done */
+    do {
+        MPI_Status status;
+        int        msg_flag;
+
+        /* Probe for an incoming message from any rank */
+        if (MPI_SUCCESS != (mpi_code = MPI_Iprobe(MPI_ANY_SOURCE, H5D_CHUNK_MOD_DATA_TAG, io_info->comm,
+                                                  &msg_flag, &status)))
+            HMPI_GOTO_ERROR(FAIL, "MPI_Iprobe failed", mpi_code)
+
+        /*
+         * If a message was found, allocate a buffer for the message and
+         * post a non-blocking receive to receive it
+         */
+        if (msg_flag) {
+#if MPI_VERSION >= 3
+            MPI_Count msg_size = 0;
+
+            if (MPI_SUCCESS != (mpi_code = MPI_Get_elements_x(&status, MPI_BYTE, &msg_size)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements_x failed", mpi_code)
+
+            H5_CHECK_OVERFLOW(msg_size, MPI_Count, int)
+#else
+            int msg_size = 0;
+
+            if (MPI_SUCCESS != (mpi_code = MPI_Get_elements(&status, MPI_BYTE, &msg_size)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
+#endif
+
+            if (msg_size <= 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "invalid chunk modification message size")
+
+            HDassert((num_recv_requests + 1) <= num_msgs_incoming);
+            if (NULL ==
+                (msg_recv_bufs[num_recv_requests] = H5MM_malloc((size_t)msg_size * sizeof(unsigned char))))
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
+                            "couldn't allocate chunk modification message receive buffer")
+
+            if (MPI_SUCCESS != (mpi_code = MPI_Irecv(msg_recv_bufs[num_recv_requests], (int)msg_size,
+                                                     MPI_BYTE, status.MPI_SOURCE, H5D_CHUNK_MOD_DATA_TAG,
+                                                     io_info->comm, &recv_requests[num_recv_requests])))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Irecv failed", mpi_code)
+
+            num_recv_requests++;
+        }
+
+        if (ibarrier_posted) {
+            int ibarrier_completed;
+
+            if (MPI_SUCCESS != (mpi_code = MPI_Test(&ibarrier, &ibarrier_completed, MPI_STATUS_IGNORE)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Test failed", mpi_code)
+
+            if (ibarrier_completed)
+                break;
+        }
+        else {
+            int all_sends_completed;
+
+            /* Determine if all send requests have completed */
+            if (MPI_SUCCESS != (mpi_code = MPI_Testall((int)num_send_requests, send_requests,
+                                                       &all_sends_completed, MPI_STATUSES_IGNORE)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Testall failed", mpi_code)
+
+            if (all_sends_completed) {
+                /* Post non-blocking barrier */
+                if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(io_info->comm, &ibarrier)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code)
+                ibarrier_posted = TRUE;
+
+                /*
+                 * Now that all send requests have completed, free up the
+                 * send buffers used in the non-blocking operations
+                 */
+                if (msg_send_bufs) {
+                    for (i = 0; i < num_send_requests; i++) {
+                        if (msg_send_bufs[i])
+                            H5MM_free(msg_send_bufs[i]);
+                    }
+
+                    msg_send_bufs = H5MM_xfree(msg_send_bufs);
+                }
+            }
+        }
+    } while (1);
+
+    /*
+     * Ensure all receive requests have completed before moving on.
+     * For linked-chunk I/O, more overlap with computation could
+     * theoretically be achieved by returning the receive requests
+     * array and postponing this wait until during chunk updating
+     * when the data is really needed. However, multi-chunk I/O
+     * only updates a chunk at a time and the messages may not come
+     * in the order that chunks are processed. So, the safest way to
+     * support both I/O modes is to simply make sure all messages
+     * are available.
+     */
+    if (MPI_SUCCESS != (mpi_code = MPI_Waitall((int)num_recv_requests, recv_requests, MPI_STATUSES_IGNORE)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Waitall failed", mpi_code)
+
+    /* Set the new number of locally-selected chunks */
+    *chunk_list_num_entries = last_assigned_idx;
+
+    /* Return chunk message buffers if any were received */
+    *chunk_hash_table   = chunk_table;
+    *chunk_msg_bufs     = msg_recv_bufs;
+    *chunk_msg_bufs_len = (int)num_recv_requests;
+
+done:
+    if (ret_value < 0) {
+        /* If this rank failed, make sure to participate in collective barrier */
+        if (!ibarrier_posted) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Ibarrier(io_info->comm, &ibarrier)))
+                HMPI_GOTO_ERROR(FAIL, "MPI_Ibarrier failed", mpi_code)
+        }
+
+        if (num_send_requests) {
+            for (i = 0; i < num_send_requests; i++) {
+                MPI_Cancel(&send_requests[i]);
+            }
+        }
+
+        if (recv_requests) {
+            for (i = 0; i < num_recv_requests; i++) {
+                MPI_Cancel(&recv_requests[i]);
+            }
+        }
+
+        if (msg_recv_bufs) {
+            for (i = 0; i < num_recv_requests; i++) {
+                H5MM_free(msg_recv_bufs[i]);
+            }
+
+            H5MM_free(msg_recv_bufs);
+        }
+
+        HASH_CLEAR(hh, chunk_table);
+    }
+
+    if (recv_requests)
+        H5MM_free(recv_requests);
+    if (send_requests)
+        H5MM_free(send_requests);
+
+    if (msg_send_bufs) {
+        for (i = 0; i < num_send_requests; i++) {
+            if (msg_send_bufs[i])
+                H5MM_free(msg_send_bufs[i]);
+        }
+
+        H5MM_free(msg_send_bufs);
+    }
+
+    if (mem_iter) {
+        if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
+            HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release dataspace selection iterator")
+        mem_iter = H5FL_FREE(H5S_sel_iter_t, mem_iter);
+    }
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+#else
+    FUNC_ENTER_STATIC
+    HERROR(
+        H5E_DATASET, H5E_WRITEERROR,
+        "unable to send chunk modification data between MPI ranks - MPI version < 3 (MPI_Ibarrier missing)")
+    FUNC_LEAVE_NOAPI(FAIL)
+#endif
+} /* end H5D__mpio_share_chunk_modification_data() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_collective_filtered_chunk_common_io
+ *
+ * Purpose:     This routine performs the common part of collective I/O
+ *              when reading or writing filtered chunks collectively.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_collective_filtered_chunk_common_io(H5D_filtered_collective_io_info_t *chunk_list,
+                                              size_t chunk_list_num_entries, const H5D_io_info_t *io_info,
+                                              const H5D_type_info_t *type_info, int mpi_size)
+{
+    H5D_io_info_t coll_io_info;
+    H5D_storage_t ctg_store;
+    MPI_Datatype  file_type            = MPI_DATATYPE_NULL;
+    MPI_Datatype  mem_type             = MPI_DATATYPE_NULL;
+    hbool_t       mem_type_is_derived  = FALSE;
+    hbool_t       file_type_is_derived = FALSE;
+    hsize_t       mpi_buf_count;
+    haddr_t       base_read_offset = HADDR_UNDEF;
+    size_t        num_chunks;
+    size_t        i;
+    char          fake_buf; /* Used as a fake buffer for ranks with no chunks, thus a NULL buf pointer */
+    int           mpi_code;
+    herr_t        ret_value = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(chunk_list || 0 == chunk_list_num_entries);
+    HDassert(io_info);
+    HDassert(type_info);
+
+    /* Initialize temporary I/O info */
+    coll_io_info = *io_info;
+
+    /*
+     * Construct MPI derived datatype for collective I/O on chunks
+     */
+    if (H5D__mpio_collective_filtered_io_type(chunk_list, chunk_list_num_entries, io_info->op_type, &mem_type,
+                                              &mem_type_is_derived, &file_type, &file_type_is_derived) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_BADTYPE, FAIL, "couldn't create MPI I/O type for chunk I/O")
+
+    /*
+     * For reads, determine how many chunks are actually being read.
+     * Note that if this is a read during a write operation
+     * (read chunk -> unfilter -> modify -> write back), some
+     * chunks may not need to be read if they're being fully
+     * overwritten during a write operation.
+     */
+    if (io_info->op_type == H5D_IO_OP_READ) {
+        for (i = 0, num_chunks = 0; i < chunk_list_num_entries; i++) {
+            HDassert(chunk_list[i].buf);
+
+            if (chunk_list[i].need_read) {
+                if (!H5F_addr_defined(base_read_offset))
+                    base_read_offset = chunk_list[i].chunk_current.offset;
+
+                num_chunks++;
+            }
+        }
+    }
+    else
+        num_chunks = chunk_list_num_entries;
+
+    /*
+     * If this rank doesn't have a selection, it can
+     * skip I/O if independent I/O was requested at
+     * the low level, or if the MPI communicator size
+     * is 1.
+     *
+     * Otherwise, this rank has to participate in
+     * collective I/O, but probably has a NULL buf
+     * pointer, so override to a fake buffer since our
+     * write/read function expects one.
+     */
+    if (num_chunks == 0) {
+        H5FD_mpio_collective_opt_t coll_opt_mode;
+
+        /* Get the collective_opt property to check whether the application wants to do IO individually. */
+        if (H5CX_get_mpio_coll_opt(&coll_opt_mode) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O collective_opt property")
+
+        if ((mpi_size == 1) || (H5FD_MPIO_INDIVIDUAL_IO == coll_opt_mode)) {
+            HGOTO_DONE(SUCCEED)
+        }
+        else {
+            if (io_info->op_type == H5D_IO_OP_WRITE)
+                coll_io_info.u.wbuf = &fake_buf;
+            else
+                coll_io_info.u.rbuf = &fake_buf;
+        }
+    }
+
+    /*
+     * Setup for I/O operation
+     */
+
+    mpi_buf_count = (num_chunks) ? 1 : 0;
+
+    if (num_chunks) {
+        /*
+         * Setup the base storage address for this operation
+         * to be the first chunk's file address
+         */
+        if (io_info->op_type == H5D_IO_OP_WRITE)
+            ctg_store.contig.dset_addr = chunk_list[0].chunk_new.offset;
+        else
+            ctg_store.contig.dset_addr = base_read_offset;
+    }
+    else
+        ctg_store.contig.dset_addr = 0;
+
+    ctg_store.contig.dset_size = (hsize_t)io_info->dset->shared->layout.u.chunk.size;
+    coll_io_info.store         = &ctg_store;
+
+    /* Perform I/O */
+    if (H5D__final_collective_io(&coll_io_info, type_info, mpi_buf_count, file_type, mem_type) < 0)
+        HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't finish MPI I/O")
+
+done:
+    /* Free the MPI buf and file types, if they were derived */
+    if (mem_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&mem_type)))
+        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    if (file_type_is_derived && MPI_SUCCESS != (mpi_code = MPI_Type_free(&file_type)))
+        HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_collective_filtered_chunk_common_io() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_collective_filtered_chunk_read
+ *
+ * Purpose:     This routine coordinates a collective read across all ranks
+ *              of the chunks they have selected. Each rank will then go
+ *              and
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_collective_filtered_chunk_read(H5D_filtered_collective_io_info_t *chunk_list,
+                                         size_t chunk_list_num_entries, const H5D_io_info_t *io_info,
+                                         const H5D_type_info_t *type_info, int mpi_rank, int mpi_size)
+{
+    H5D_fill_buf_info_t fb_info;
+    H5D_chunk_info_t *  chunk_info = NULL;
+    H5D_io_info_t       coll_io_info;
+    H5Z_EDC_t           err_detect; /* Error detection info */
+    H5Z_cb_t            filter_cb;  /* I/O filter callback function */
+    hsize_t             file_chunk_size = 0;
+    hsize_t             iter_nelmts; /* Number of points to iterate over for the chunk IO operation */
+    hbool_t             should_fill  = FALSE;
+    hbool_t             fb_info_init = FALSE;
+    hbool_t             index_empty  = FALSE;
+    size_t              i;
+    H5S_t *             fill_space    = NULL;
+    void *              base_read_buf = NULL;
+    herr_t              ret_value     = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(chunk_list || 0 == chunk_list_num_entries);
+    HDassert(io_info);
+    HDassert(type_info);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Filtered collective chunk read");
+#else
+    (void)mpi_rank;
+#endif
+
+    /* Initialize temporary I/O info */
+    coll_io_info        = *io_info;
+    coll_io_info.u.rbuf = NULL;
+
+    if (chunk_list_num_entries) {
+        /* Retrieve filter settings from API context */
+        if (H5CX_get_err_detect(&err_detect) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info")
+        if (H5CX_get_filter_cb(&filter_cb) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function")
+
+        /* Set size of full chunks in dataset */
+        file_chunk_size = io_info->dset->shared->layout.u.chunk.size;
+
+        /* Determine if fill values should be "read" for unallocated chunks */
+        should_fill = (io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_ALLOC) ||
+                      ((io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_IFSET) &&
+                       io_info->dset->shared->dcpl_cache.fill.fill_defined);
+    }
+
+    /*
+     * Allocate memory buffers for all chunks being read. Chunk data buffers are of
+     * the largest size between the chunk's current filtered size and the chunk's true
+     * size, as calculated by the number of elements in the chunk's file space extent
+     * multiplied by the datatype size. This tries to ensure that:
+     *
+     *  * If we're reading the chunk and the filter normally reduces the chunk size,
+     *    the unfiltering operation won't need to grow the buffer.
+     *  * If we're reading the chunk and the filter normally grows the chunk size,
+     *    we make sure to read into a buffer of size equal to the filtered chunk's
+     *    size; reading into a (smaller) buffer of size equal to the unfiltered
+     *    chunk size would of course be bad.
+     */
+    for (i = 0; i < chunk_list_num_entries; i++) {
+        HDassert(chunk_list[i].need_read);
+
+        chunk_list[i].chunk_buf_size = MAX(chunk_list[i].chunk_current.length, file_chunk_size);
+
+        if (NULL == (chunk_list[i].buf = H5MM_malloc(chunk_list[i].chunk_buf_size))) {
+            /* Push an error, but participate in collective read */
+            HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer")
+            break;
+        }
+
+        /*
+         * Check if chunk is currently allocated. If not, don't try to
+         * read it from the file. Instead, just fill the chunk buffer
+         * with the fill value if necessary.
+         */
+        if (H5F_addr_defined(chunk_list[i].chunk_current.offset)) {
+            /* Set first read buffer */
+            if (!base_read_buf)
+                base_read_buf = chunk_list[i].buf;
+
+            /* Set chunk's new length for eventual filter pipeline calls */
+            if (chunk_list[i].skip_filter_pline)
+                chunk_list[i].chunk_new.length = file_chunk_size;
+            else
+                chunk_list[i].chunk_new.length = chunk_list[i].chunk_current.length;
+        }
+        else {
+            chunk_list[i].need_read = FALSE;
+
+            /* Set chunk's new length for eventual filter pipeline calls */
+            chunk_list[i].chunk_new.length = file_chunk_size;
+
+            if (should_fill) {
+                /* Initialize fill value buffer if not already initialized */
+                if (!fb_info_init) {
+                    hsize_t chunk_dims[H5S_MAX_RANK];
+
+                    HDassert(io_info->dset->shared->ndims == io_info->dset->shared->layout.u.chunk.ndims - 1);
+                    for (size_t j = 0; j < io_info->dset->shared->layout.u.chunk.ndims - 1; j++)
+                        chunk_dims[j] = (hsize_t)io_info->dset->shared->layout.u.chunk.dim[j];
+
+                    /* Get a dataspace for filling chunk memory buffers */
+                    if (NULL == (fill_space = H5S_create_simple(
+                                     io_info->dset->shared->layout.u.chunk.ndims - 1, chunk_dims, NULL)))
+                        HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to create chunk fill dataspace")
+
+                    /* Initialize fill value buffer */
+                    if (H5D__fill_init(&fb_info, NULL, (H5MM_allocate_t)H5D__chunk_mem_alloc,
+                                       (void *)&io_info->dset->shared->dcpl_cache.pline,
+                                       (H5MM_free_t)H5D__chunk_mem_free,
+                                       (void *)&io_info->dset->shared->dcpl_cache.pline,
+                                       &io_info->dset->shared->dcpl_cache.fill, io_info->dset->shared->type,
+                                       io_info->dset->shared->type_id, 0, file_chunk_size) < 0)
+                        HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize fill value buffer")
+
+                    fb_info_init = TRUE;
+                }
+
+                /* Write fill value to memory buffer */
+                HDassert(fb_info.fill_buf);
+                if (H5D__fill(fb_info.fill_buf, io_info->dset->shared->type, chunk_list[i].buf,
+                              type_info->mem_type, fill_space) < 0)
+                    HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "couldn't fill chunk buffer with fill value")
+            }
+        }
+    }
+
+    /*
+     * If dataset is incrementally allocated and hasn't been written to
+     * yet, the chunk index should be empty. In this case, a collective
+     * read of chunks is essentially a no-op, so avoid it here.
+     */
+    index_empty = FALSE;
+    if (io_info->dset->shared->dcpl_cache.fill.alloc_time == H5D_ALLOC_TIME_INCR)
+        if (H5D__chunk_index_empty(io_info->dset, &index_empty) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't determine if chunk index is empty")
+
+    if (!index_empty) {
+        /*
+         * Override the read buffer to point to the address of
+         * the first chunk data buffer being read into
+         */
+        if (base_read_buf)
+            coll_io_info.u.rbuf = base_read_buf;
+
+        /* Perform collective chunk read */
+        if (H5D__mpio_collective_filtered_chunk_common_io(chunk_list, chunk_list_num_entries, &coll_io_info,
+                                                          type_info, mpi_size) < 0)
+            HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't finish collective filtered chunk read")
+    }
+
+    /*
+     * Iterate through all the read chunks, unfiltering them and scattering their
+     * data out to the application's read buffer.
+     */
+    for (i = 0; i < chunk_list_num_entries; i++) {
+        chunk_info = chunk_list[i].chunk_info;
+
+        /* Unfilter the chunk, unless we didn't read it from the file */
+        if (chunk_list[i].need_read && !chunk_list[i].skip_filter_pline) {
+            if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE,
+                             &(chunk_list[i].index_info.filter_mask), err_detect, filter_cb,
+                             (size_t *)&chunk_list[i].chunk_new.length, &chunk_list[i].chunk_buf_size,
+                             &chunk_list[i].buf) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying")
+        }
+
+        /* Scatter the chunk data to the read buffer */
+        iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace);
+
+        if (H5D_select_io_mem(io_info->u.rbuf, chunk_info->mspace, chunk_list[i].buf, chunk_info->fspace,
+                              type_info->src_type_size, (size_t)iter_nelmts) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't copy chunk data to read buffer")
+    }
+
+done:
+    /* Free all resources used by entries in the chunk list */
+    for (i = 0; i < chunk_list_num_entries; i++) {
+        if (chunk_list[i].buf) {
+            H5MM_free(chunk_list[i].buf);
+            chunk_list[i].buf = NULL;
+        }
+    }
+
+    /* Release the fill buffer info, if it's been initialized */
+    if (fb_info_init && H5D__fill_term(&fb_info) < 0)
+        HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info")
+    if (fill_space && (H5S_close(fill_space) < 0))
+        HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space")
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_collective_filtered_chunk_read() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_collective_filtered_chunk_update
+ *
+ * Purpose:     When performing a parallel write on a chunked dataset with
+ *              filters applied, all ranks must update their owned chunks
+ *              with their own modification data and data from other ranks.
+ *              This routine is responsible for coordinating that process.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_collective_filtered_chunk_update(H5D_filtered_collective_io_info_t *chunk_list,
+                                           size_t                             chunk_list_num_entries,
+                                           H5D_filtered_collective_io_info_t *chunk_hash_table,
+                                           unsigned char **chunk_msg_bufs, int chunk_msg_bufs_len,
+                                           const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
+                                           int mpi_rank, int mpi_size)
+{
+    H5D_fill_buf_info_t fb_info;
+    H5D_chunk_info_t *  chunk_info = NULL;
+    H5S_sel_iter_t *    sel_iter   = NULL; /* Dataspace selection iterator for H5D__scatter_mem */
+    H5D_io_info_t       coll_io_info;
+    H5Z_EDC_t           err_detect; /* Error detection info */
+    H5Z_cb_t            filter_cb;  /* I/O filter callback function */
+    hsize_t             file_chunk_size = 0;
+    hsize_t             iter_nelmts; /* Number of points to iterate over for the chunk IO operation */
+    hbool_t             should_fill   = FALSE;
+    hbool_t             fb_info_init  = FALSE;
+    hbool_t             sel_iter_init = FALSE;
+    hbool_t             index_empty   = FALSE;
+    size_t              i;
+    H5S_t *             dataspace     = NULL;
+    H5S_t *             fill_space    = NULL;
+    void *              base_read_buf = NULL;
+    herr_t              ret_value     = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(chunk_list || 0 == chunk_list_num_entries);
+    HDassert((chunk_msg_bufs && chunk_hash_table) || 0 == chunk_msg_bufs_len);
+    HDassert(io_info);
+    HDassert(type_info);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Filtered collective chunk update");
+#endif
+
+    if (chunk_list_num_entries) {
+        /* Retrieve filter settings from API context */
+        if (H5CX_get_err_detect(&err_detect) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info")
+        if (H5CX_get_filter_cb(&filter_cb) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function")
+
+        /* Set size of full chunks in dataset */
+        file_chunk_size = io_info->dset->shared->layout.u.chunk.size;
+
+        /* Determine if fill values should be written to chunks */
+        should_fill = (io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_ALLOC) ||
+                      ((io_info->dset->shared->dcpl_cache.fill.fill_time == H5D_FILL_TIME_IFSET) &&
+                       io_info->dset->shared->dcpl_cache.fill.fill_defined);
+    }
+
+    /*
+     * Allocate memory buffers for all owned chunks. Chunk data buffers are of the
+     * largest size between the chunk's current filtered size and the chunk's true
+     * size, as calculated by the number of elements in the chunk's file space extent
+     * multiplied by the datatype size. This tries to ensure that:
+     *
+     *  * If we're fully overwriting the chunk and the filter normally reduces the
+     *    chunk size, we simply have the exact buffer size required to hold the
+     *    unfiltered chunk data.
+     *  * If we're fully overwriting the chunk and the filter normally grows the
+     *    chunk size (e.g., fletcher32 filter), the final filtering operation
+     *    (hopefully) won't need to grow the buffer.
+     *  * If we're reading the chunk and the filter normally reduces the chunk size,
+     *    the unfiltering operation won't need to grow the buffer.
+     *  * If we're reading the chunk and the filter normally grows the chunk size,
+     *    we make sure to read into a buffer of size equal to the filtered chunk's
+     *    size; reading into a (smaller) buffer of size equal to the unfiltered
+     *    chunk size would of course be bad.
+     */
+    for (i = 0; i < chunk_list_num_entries; i++) {
+        HDassert(mpi_rank == chunk_list[i].new_owner);
+
+        chunk_list[i].chunk_buf_size = MAX(chunk_list[i].chunk_current.length, file_chunk_size);
+
+        /*
+         * If this chunk hasn't been allocated yet and we aren't writing
+         * out fill values to it, make sure to 0-fill its memory buffer
+         * so we don't use uninitialized memory.
+         */
+        if (!H5F_addr_defined(chunk_list[i].chunk_current.offset) && !should_fill)
+            chunk_list[i].buf = H5MM_calloc(chunk_list[i].chunk_buf_size);
+        else
+            chunk_list[i].buf = H5MM_malloc(chunk_list[i].chunk_buf_size);
+
+        if (NULL == chunk_list[i].buf) {
+            /* Push an error, but participate in collective read */
+            HDONE_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer")
+            break;
+        }
+
+        /* Set chunk's new length for eventual filter pipeline calls */
+        if (chunk_list[i].need_read) {
+            /*
+             * Check if chunk is currently allocated. If not, don't try to
+             * read it from the file. Instead, just fill the chunk buffer
+             * with the fill value if fill values are to be written.
+             */
+            if (H5F_addr_defined(chunk_list[i].chunk_current.offset)) {
+                /* Set first read buffer */
+                if (!base_read_buf)
+                    base_read_buf = chunk_list[i].buf;
+
+                /* Set chunk's new length for eventual filter pipeline calls */
+                if (chunk_list[i].skip_filter_pline)
+                    chunk_list[i].chunk_new.length = file_chunk_size;
+                else
+                    chunk_list[i].chunk_new.length = chunk_list[i].chunk_current.length;
+            }
+            else {
+                chunk_list[i].need_read = FALSE;
+
+                /* Set chunk's new length for eventual filter pipeline calls */
+                chunk_list[i].chunk_new.length = file_chunk_size;
+
+                if (should_fill) {
+                    /* Initialize fill value buffer if not already initialized */
+                    if (!fb_info_init) {
+                        hsize_t chunk_dims[H5S_MAX_RANK];
+
+                        HDassert(io_info->dset->shared->ndims ==
+                                 io_info->dset->shared->layout.u.chunk.ndims - 1);
+                        for (size_t j = 0; j < io_info->dset->shared->layout.u.chunk.ndims - 1; j++)
+                            chunk_dims[j] = (hsize_t)io_info->dset->shared->layout.u.chunk.dim[j];
+
+                        /* Get a dataspace for filling chunk memory buffers */
+                        if (NULL == (fill_space = H5S_create_simple(
+                                         io_info->dset->shared->layout.u.chunk.ndims - 1, chunk_dims, NULL)))
+                            HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
+                                        "unable to create chunk fill dataspace")
+
+                        /* Initialize fill value buffer */
+                        if (H5D__fill_init(&fb_info, NULL, (H5MM_allocate_t)H5D__chunk_mem_alloc,
+                                           (void *)&io_info->dset->shared->dcpl_cache.pline,
+                                           (H5MM_free_t)H5D__chunk_mem_free,
+                                           (void *)&io_info->dset->shared->dcpl_cache.pline,
+                                           &io_info->dset->shared->dcpl_cache.fill,
+                                           io_info->dset->shared->type, io_info->dset->shared->type_id, 0,
+                                           file_chunk_size) < 0)
+                            HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize fill value buffer")
+
+                        fb_info_init = TRUE;
+                    }
+
+                    /* Write fill value to memory buffer */
+                    HDassert(fb_info.fill_buf);
+                    if (H5D__fill(fb_info.fill_buf, io_info->dset->shared->type, chunk_list[i].buf,
+                                  type_info->mem_type, fill_space) < 0)
+                        HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
+                                    "couldn't fill chunk buffer with fill value")
+                }
+            }
+        }
+        else
+            chunk_list[i].chunk_new.length = file_chunk_size;
+    }
+
+    /*
+     * If dataset is incrementally allocated and hasn't been written to
+     * yet, the chunk index should be empty. In this case, a collective
+     * read of chunks is essentially a no-op, so avoid it here.
+     */
+    index_empty = FALSE;
+    if (io_info->dset->shared->dcpl_cache.fill.alloc_time == H5D_ALLOC_TIME_INCR)
+        if (H5D__chunk_index_empty(io_info->dset, &index_empty) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't determine if chunk index is empty")
+
+    if (!index_empty) {
+        /*
+         * Setup for I/O operation
+         */
+
+        /* Initialize temporary I/O info */
+        coll_io_info         = *io_info;
+        coll_io_info.op_type = H5D_IO_OP_READ;
+
+        /* Override the read buffer to point to the address of the first
+         * chunk data buffer being read into
+         */
+        if (base_read_buf)
+            coll_io_info.u.rbuf = base_read_buf;
+
+        /* Read all chunks that need to be read from the file */
+        if (H5D__mpio_collective_filtered_chunk_common_io(chunk_list, chunk_list_num_entries, &coll_io_info,
+                                                          type_info, mpi_size) < 0)
+            HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't finish collective filtered chunk read")
+    }
+
+    /*
+     * Now that all owned chunks have been read, update the chunks
+     * with modification data from the owning rank and other ranks.
+     */
+
+    /* Process all chunks with data from the owning rank first */
+    for (i = 0; i < chunk_list_num_entries; i++) {
+        HDassert(mpi_rank == chunk_list[i].new_owner);
+
+        chunk_info = chunk_list[i].chunk_info;
+
+        /*
+         * If this chunk wasn't being fully overwritten, we read it from
+         * the file, so we need to unfilter it
+         */
+        if (chunk_list[i].need_read && !chunk_list[i].skip_filter_pline) {
+            if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE,
+                             &(chunk_list[i].index_info.filter_mask), err_detect, filter_cb,
+                             (size_t *)&chunk_list[i].chunk_new.length, &chunk_list[i].chunk_buf_size,
+                             &chunk_list[i].buf) < 0)
+                HGOTO_ERROR(H5E_DATASET, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying")
+        }
+
+        iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
+
+        if (H5D_select_io_mem(chunk_list[i].buf, chunk_info->fspace, io_info->u.wbuf, chunk_info->mspace,
+                              type_info->dst_type_size, (size_t)iter_nelmts) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't copy chunk data to write buffer")
+    }
+
+    /* Allocate iterator for memory selection */
+    if (NULL == (sel_iter = H5FL_MALLOC(H5S_sel_iter_t)))
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator")
+
+    /* Now process all received chunk message buffers */
+    for (i = 0; i < (size_t)chunk_msg_bufs_len; i++) {
+        H5D_filtered_collective_io_info_t *chunk_entry = NULL;
+        const unsigned char *              msg_ptr     = chunk_msg_bufs[i];
+        hsize_t                            chunk_idx;
+
+        if (msg_ptr) {
+            /* Retrieve the chunk's index value */
+            HDmemcpy(&chunk_idx, msg_ptr, sizeof(hsize_t));
+            msg_ptr += sizeof(hsize_t);
+
+            /* Find the chunk entry according to its chunk index */
+            HASH_FIND(hh, chunk_hash_table, &chunk_idx, sizeof(hsize_t), chunk_entry);
+            HDassert(chunk_entry);
+            HDassert(mpi_rank == chunk_entry->new_owner);
+
+            /*
+             * Only process the chunk if its data buffer is allocated.
+             * In the case of multi-chunk I/O, we're only working on
+             * a chunk at a time, so we need to skip over messages
+             * that aren't for the chunk we're currently working on.
+             */
+            if (!chunk_entry->buf)
+                continue;
+            else {
+                /* Decode the chunk file dataspace from the message */
+                if (NULL == (dataspace = H5S_decode(&msg_ptr)))
+                    HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, FAIL, "unable to decode dataspace")
+
+                if (H5S_select_iter_init(sel_iter, dataspace, type_info->dst_type_size,
+                                         H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0)
+                    HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
+                                "unable to initialize memory selection information")
+                sel_iter_init = TRUE;
+
+                iter_nelmts = H5S_GET_SELECT_NPOINTS(dataspace);
+
+                /* Update the chunk data with the received modification data */
+                if (H5D__scatter_mem(msg_ptr, sel_iter, (size_t)iter_nelmts, chunk_entry->buf) < 0)
+                    HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't scatter to write buffer")
+
+                if (H5S_SELECT_ITER_RELEASE(sel_iter) < 0)
+                    HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
+                sel_iter_init = FALSE;
+
+                if (dataspace) {
+                    if (H5S_close(dataspace) < 0)
+                        HGOTO_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace")
+                    dataspace = NULL;
+                }
+
+                H5MM_free(chunk_msg_bufs[i]);
+                chunk_msg_bufs[i] = NULL;
+            }
+        }
+    }
+
+    /* Finally, filter all the chunks */
+    for (i = 0; i < chunk_list_num_entries; i++) {
+        if (!chunk_list[i].skip_filter_pline) {
+            if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, 0,
+                             &(chunk_list[i].index_info.filter_mask), err_detect, filter_cb,
+                             (size_t *)&chunk_list[i].chunk_new.length, &chunk_list[i].chunk_buf_size,
+                             &chunk_list[i].buf) < 0)
+                HGOTO_ERROR(H5E_PLINE, H5E_CANTFILTER, FAIL, "output pipeline failed")
+        }
+
+#if H5_SIZEOF_SIZE_T > 4
+        /* Check for the chunk expanding too much to encode in a 32-bit value */
+        if (chunk_list[i].chunk_new.length > ((size_t)0xffffffff))
+            HGOTO_ERROR(H5E_DATASET, H5E_BADRANGE, FAIL, "chunk too large for 32-bit length")
+#endif
+    }
+
+done:
+    if (sel_iter) {
+        if (sel_iter_init && H5S_SELECT_ITER_RELEASE(sel_iter) < 0)
+            HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
+        sel_iter = H5FL_FREE(H5S_sel_iter_t, sel_iter);
+    }
+    if (dataspace && (H5S_close(dataspace) < 0))
+        HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace")
+    if (fill_space && (H5S_close(fill_space) < 0))
+        HDONE_ERROR(H5E_DATASET, H5E_CLOSEERROR, FAIL, "can't close fill space")
+
+    /* Release the fill buffer info, if it's been initialized */
+    if (fb_info_init && H5D__fill_term(&fb_info) < 0)
+        HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "Can't release fill buffer info")
+
+    /* On failure, try to free all resources used by entries in the chunk list */
+    if (ret_value < 0) {
+        for (i = 0; i < chunk_list_num_entries; i++) {
+            if (chunk_list[i].buf) {
+                H5MM_free(chunk_list[i].buf);
+                chunk_list[i].buf = NULL;
+            }
+        }
+    }
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_collective_filtered_chunk_update() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_collective_filtered_chunk_reallocate
+ *
+ * Purpose:     When performing a parallel write on a chunked dataset with
+ *              filters applied, all ranks must eventually get together and
+ *              perform a collective reallocation of space in the file for
+ *              all chunks that were modified on all ranks. This routine is
+ *              responsible for coordinating that process.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_collective_filtered_chunk_reallocate(H5D_filtered_collective_io_info_t *chunk_list,
+                                               size_t chunk_list_num_entries, size_t *num_chunks_assigned_map,
+                                               H5D_io_info_t *io_info, H5D_chk_idx_info_t *idx_info,
+                                               int mpi_rank, int mpi_size)
+{
+    H5D_chunk_alloc_info_t *collective_list = NULL;
+    MPI_Datatype            send_type;
+    MPI_Datatype            recv_type;
+    hbool_t                 send_type_derived          = FALSE;
+    hbool_t                 recv_type_derived          = FALSE;
+    hbool_t                 need_sort                  = FALSE;
+    size_t                  collective_num_entries     = 0;
+    size_t                  num_local_chunks_processed = 0;
+    size_t                  i;
+    void *                  gathered_array     = NULL;
+    int *                   counts_disps_array = NULL;
+    int *                   counts_ptr         = NULL;
+    int *                   displacements_ptr  = NULL;
+    int                     mpi_code;
+    herr_t                  ret_value = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(chunk_list || 0 == chunk_list_num_entries);
+    HDassert(io_info);
+    HDassert(idx_info);
+    HDassert(idx_info->storage->idx_type != H5D_CHUNK_IDX_NONE);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Reallocation of chunk file space");
+#endif
+
+    /*
+     * Make sure it's safe to cast this rank's number
+     * of chunks to be sent into an int for MPI
+     */
+    H5_CHECK_OVERFLOW(chunk_list_num_entries, size_t, int);
+
+    /* Create derived datatypes for the chunk file space info needed */
+    if (H5D__mpio_get_chunk_alloc_info_types(&recv_type, &recv_type_derived, &send_type, &send_type_derived) <
+        0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                    "can't create derived datatypes for chunk file space info")
+
+    /*
+     * Gather the new chunk sizes to all ranks for a collective reallocation
+     * of the chunks in the file.
+     */
+    if (num_chunks_assigned_map) {
+        /*
+         * If a mapping between rank value -> number of assigned chunks has
+         * been provided (usually during linked-chunk I/O), we can use this
+         * to optimize MPI overhead a bit since MPI ranks won't need to
+         * first inform each other about how many chunks they're contributing.
+         */
+        if (NULL == (counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*counts_disps_array)))) {
+            /* Push an error, but still participate in collective gather operation */
+            HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+                        "couldn't allocate receive counts and displacements array")
+        }
+        else {
+            /* Set the receive counts from the assigned chunks map */
+            counts_ptr = counts_disps_array;
+
+            for (i = 0; i < (size_t)mpi_size; i++)
+                H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t);
+
+            /* Set the displacements into the receive buffer for the gather operation */
+            displacements_ptr = &counts_disps_array[mpi_size];
+
+            *displacements_ptr = 0;
+            for (i = 1; i < (size_t)mpi_size; i++)
+                displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1];
+        }
+
+        /* Perform gather operation */
+        if (H5_mpio_gatherv_alloc(chunk_list, (int)chunk_list_num_entries, send_type, counts_ptr,
+                                  displacements_ptr, recv_type, TRUE, 0, io_info->comm, mpi_rank, mpi_size,
+                                  &gathered_array, &collective_num_entries) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "can't gather chunk file space info to/from ranks")
+    }
+    else {
+        /*
+         * If no mapping between rank value -> number of assigned chunks has
+         * been provided (usually during multi-chunk I/O), all MPI ranks will
+         * need to first inform other ranks about how many chunks they're
+         * contributing before performing the actual gather operation. Use
+         * the 'simple' MPI_Allgatherv wrapper for this.
+         */
+        if (H5_mpio_gatherv_alloc_simple(chunk_list, (int)chunk_list_num_entries, send_type, recv_type, TRUE,
+                                         0, io_info->comm, mpi_rank, mpi_size, &gathered_array,
+                                         &collective_num_entries) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "can't gather chunk file space info to/from ranks")
+    }
+
+    /* Collectively re-allocate the modified chunks (from each rank) in the file */
+    collective_list = (H5D_chunk_alloc_info_t *)gathered_array;
+    for (i = 0, num_local_chunks_processed = 0; i < collective_num_entries; i++) {
+        H5D_chunk_alloc_info_t *coll_entry = &collective_list[i];
+        hbool_t                 need_insert;
+        hbool_t                 update_local_chunk;
+
+        if (H5D__chunk_file_alloc(idx_info, &coll_entry->chunk_current, &coll_entry->chunk_new, &need_insert,
+                                  NULL) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate chunk")
+
+        /*
+         * If we just re-allocated a chunk that is local to this
+         * rank, make sure to update the chunk entry in the local
+         * chunk list
+         */
+        update_local_chunk =
+            (num_local_chunks_processed < chunk_list_num_entries) &&
+            (coll_entry->chunk_idx == chunk_list[num_local_chunks_processed].index_info.chunk_idx);
+
+        if (update_local_chunk) {
+            H5D_filtered_collective_io_info_t *local_chunk;
+
+            local_chunk = &chunk_list[num_local_chunks_processed];
+
+            /* Sanity check that this chunk is actually local */
+            HDassert(mpi_rank == local_chunk->orig_owner);
+            HDassert(mpi_rank == local_chunk->new_owner);
+
+            local_chunk->chunk_new              = coll_entry->chunk_new;
+            local_chunk->index_info.need_insert = need_insert;
+
+            /*
+             * Since chunk reallocation can move chunks around, check if
+             * the local chunk list is still in ascending offset of order
+             * in the file
+             */
+            if (num_local_chunks_processed) {
+                haddr_t curr_chunk_offset = local_chunk->chunk_new.offset;
+                haddr_t prev_chunk_offset = chunk_list[num_local_chunks_processed - 1].chunk_new.offset;
+
+                HDassert(H5F_addr_defined(prev_chunk_offset) && H5F_addr_defined(curr_chunk_offset));
+                if (curr_chunk_offset < prev_chunk_offset)
+                    need_sort = TRUE;
+            }
+
+            num_local_chunks_processed++;
+        }
+    }
+
+    HDassert(chunk_list_num_entries == num_local_chunks_processed);
+
+    /*
+     * Ensure this rank's local chunk list is sorted in
+     * ascending order of offset in the file
+     */
+    if (need_sort)
+        HDqsort(chunk_list, chunk_list_num_entries, sizeof(H5D_filtered_collective_io_info_t),
+                H5D__cmp_filtered_collective_io_info_entry);
+
+done:
+    H5MM_free(gathered_array);
+    H5MM_free(counts_disps_array);
+
+    if (send_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&send_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+    if (recv_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&recv_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* H5D__mpio_collective_filtered_chunk_reallocate() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_collective_filtered_chunk_reinsert
+ *
+ * Purpose:     When performing a parallel write on a chunked dataset with
+ *              filters applied, all ranks must eventually get together and
+ *              perform a collective reinsertion into the dataset's chunk
+ *              index of chunks that were modified. This routine is
+ *              responsible for coordinating that process.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_collective_filtered_chunk_reinsert(H5D_filtered_collective_io_info_t *chunk_list,
+                                             size_t chunk_list_num_entries, size_t *num_chunks_assigned_map,
+                                             H5D_io_info_t *io_info, H5D_chk_idx_info_t *idx_info,
+                                             int mpi_rank, int mpi_size)
+{
+    H5D_chunk_ud_t chunk_ud;
+    MPI_Datatype   send_type;
+    MPI_Datatype   recv_type;
+    hbool_t        send_type_derived = FALSE;
+    hbool_t        recv_type_derived = FALSE;
+    hsize_t        scaled_coords[H5O_LAYOUT_NDIMS];
+    size_t         collective_num_entries = 0;
+    size_t         i;
+    void *         gathered_array     = NULL;
+    int *          counts_disps_array = NULL;
+    int *          counts_ptr         = NULL;
+    int *          displacements_ptr  = NULL;
+    int            mpi_code;
+    herr_t         ret_value = SUCCEED;
+
+    FUNC_ENTER_STATIC
+
+    HDassert(chunk_list || 0 == chunk_list_num_entries);
+    HDassert(io_info);
+    HDassert(idx_info);
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TRACE_ENTER(mpi_rank);
+    H5D_MPIO_TIME_START(mpi_rank, "Reinsertion of modified chunks into chunk index");
+#endif
+
+    /* Only re-insert chunks if index has an insert method */
+    if (!idx_info->storage->ops->insert)
+        HGOTO_DONE(SUCCEED);
+
+    /*
+     * Make sure it's safe to cast this rank's number
+     * of chunks to be sent into an int for MPI
+     */
+    H5_CHECK_OVERFLOW(chunk_list_num_entries, size_t, int);
+
+    /* Create derived datatypes for the chunk re-insertion info needed */
+    if (H5D__mpio_get_chunk_insert_info_types(&recv_type, &recv_type_derived, &send_type,
+                                              &send_type_derived) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL,
+                    "can't create derived datatypes for chunk re-insertion info")
+
+    /*
+     * Gather information to all ranks for a collective re-insertion
+     * of the modified chunks into the chunk index
+     */
+    if (num_chunks_assigned_map) {
+        /*
+         * If a mapping between rank value -> number of assigned chunks has
+         * been provided (usually during linked-chunk I/O), we can use this
+         * to optimize MPI overhead a bit since MPI ranks won't need to
+         * first inform each other about how many chunks they're contributing.
+         */
+        if (NULL == (counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*counts_disps_array)))) {
+            /* Push an error, but still participate in collective gather operation */
+            HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+                        "couldn't allocate receive counts and displacements array")
+        }
+        else {
+            /* Set the receive counts from the assigned chunks map */
+            counts_ptr = counts_disps_array;
+
+            for (i = 0; i < (size_t)mpi_size; i++)
+                H5_CHECKED_ASSIGN(counts_ptr[i], int, num_chunks_assigned_map[i], size_t);
+
+            /* Set the displacements into the receive buffer for the gather operation */
+            displacements_ptr = &counts_disps_array[mpi_size];
+
+            *displacements_ptr = 0;
+            for (i = 1; i < (size_t)mpi_size; i++)
+                displacements_ptr[i] = displacements_ptr[i - 1] + counts_ptr[i - 1];
+        }
+
+        /* Perform gather operation */
+        if (H5_mpio_gatherv_alloc(chunk_list, (int)chunk_list_num_entries, send_type, counts_ptr,
+                                  displacements_ptr, recv_type, TRUE, 0, io_info->comm, mpi_rank, mpi_size,
+                                  &gathered_array, &collective_num_entries) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL,
+                        "can't gather chunk index re-insertion info to/from ranks")
+    }
+    else {
+        /*
+         * If no mapping between rank value -> number of assigned chunks has
+         * been provided (usually during multi-chunk I/O), all MPI ranks will
+         * need to first inform other ranks about how many chunks they're
+         * contributing before performing the actual gather operation. Use
+         * the 'simple' MPI_Allgatherv wrapper for this.
+         */
+        if (H5_mpio_gatherv_alloc_simple(chunk_list, (int)chunk_list_num_entries, send_type, recv_type, TRUE,
+                                         0, io_info->comm, mpi_rank, mpi_size, &gathered_array,
+                                         &collective_num_entries) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL,
+                        "can't gather chunk index re-insertion info to/from ranks")
+    }
+
+    /* Initialize static chunk udata fields from chunk index info */
+    H5D_MPIO_INIT_CHUNK_UD_INFO(chunk_ud, idx_info);
+
+    for (i = 0; i < collective_num_entries; i++) {
+        H5D_chunk_insert_info_t *coll_entry = &((H5D_chunk_insert_info_t *)gathered_array)[i];
+
+        /*
+         * We only need to reinsert this chunk if we had to actually
+         * allocate or reallocate space in the file for it
+         */
+        if (!coll_entry->index_info.need_insert)
+            continue;
+
+        chunk_ud.chunk_block   = coll_entry->chunk_block;
+        chunk_ud.chunk_idx     = coll_entry->index_info.chunk_idx;
+        chunk_ud.filter_mask   = coll_entry->index_info.filter_mask;
+        chunk_ud.common.scaled = scaled_coords;
+
+        /* Calculate scaled coordinates for the chunk */
+        if (idx_info->layout->idx_type == H5D_CHUNK_IDX_EARRAY && idx_info->layout->u.earray.unlim_dim > 0) {
+            /*
+             * Extensible arrays where the unlimited dimension is not
+             * the slowest-changing dimension "swizzle" the coordinates
+             * to move the unlimited dimension value to offset 0. Therefore,
+             * we use the "swizzled" down chunks to calculate the "swizzled"
+             * scaled coordinates and then we undo the "swizzle" operation.
+             *
+             * TODO: In the future, this is something that should be handled
+             *       by the particular chunk index rather than manually
+             *       here. Likely, the chunk index ops should get a new
+             *       callback that accepts a chunk index and provides the
+             *       caller with the scaled coordinates for that chunk.
+             */
+            H5VM_array_calc_pre(chunk_ud.chunk_idx, io_info->dset->shared->ndims,
+                                idx_info->layout->u.earray.swizzled_down_chunks, scaled_coords);
+
+            H5VM_unswizzle_coords(hsize_t, scaled_coords, idx_info->layout->u.earray.unlim_dim);
+        }
+        else {
+            H5VM_array_calc_pre(chunk_ud.chunk_idx, io_info->dset->shared->ndims,
+                                io_info->dset->shared->layout.u.chunk.down_chunks, scaled_coords);
+        }
+
+        scaled_coords[io_info->dset->shared->ndims] = 0;
+
+#ifndef NDEBUG
+        /*
+         * If a matching local chunk entry is found, the
+         * `chunk_info` structure (which contains the chunk's
+         * pre-computed scaled coordinates) will be valid
+         * for this rank. Compare those coordinates against
+         * the calculated coordinates above to make sure
+         * they match.
+         */
+        for (size_t dbg_idx = 0; dbg_idx < chunk_list_num_entries; dbg_idx++) {
+            if (coll_entry->index_info.chunk_idx == chunk_list[dbg_idx].index_info.chunk_idx) {
+                hbool_t coords_match = !HDmemcmp(scaled_coords, chunk_list[dbg_idx].chunk_info->scaled,
+                                                 io_info->dset->shared->ndims * sizeof(hsize_t));
+
+                HDassert(coords_match && "Calculated scaled coordinates for chunk didn't match "
+                                         "chunk's actual scaled coordinates!");
+                break;
+            }
+        }
+#endif
+
+        if ((idx_info->storage->ops->insert)(idx_info, &chunk_ud, io_info->dset) < 0)
+            HGOTO_ERROR(H5E_DATASET, H5E_CANTINSERT, FAIL, "unable to insert chunk address into index")
+    }
+
+done:
+    H5MM_free(gathered_array);
+    H5MM_free(counts_disps_array);
+
+    if (send_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&send_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+    if (recv_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&recv_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+
+#ifdef H5Dmpio_DEBUG
+    H5D_MPIO_TIME_STOP(mpi_rank);
+    H5D_MPIO_TRACE_EXIT(mpi_rank);
+#endif
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_collective_filtered_chunk_reinsert() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_get_chunk_redistribute_info_types
  *
- * Return:      Non-negative on success/Negative on failure
+ * Purpose:     Constructs MPI derived datatypes for communicating the
+ *              info from a H5D_filtered_collective_io_info_t structure
+ *              that is necessary for redistributing shared chunks during a
+ *              collective write of filtered chunks.
+ *
+ *              The datatype returned through `contig_type` has an extent
+ *              equal to the size of an H5D_chunk_redistribute_info_t
+ *              structure and is suitable for communicating that structure
+ *              type.
  *
- * Programmer:  Jordan Henderson
- *              Monday, May 1, 2017
+ *              The datatype returned through `resized_type` has an extent
+ *              equal to the size of an H5D_filtered_collective_io_info_t
+ *              structure. This makes it suitable for sending an array of
+ *              those structures, while extracting out just the info
+ *              necessary for the chunk redistribution operation during
+ *              communication.
+ *
+ * Return:      Non-negative on success/Negative on failure
  *
  *-------------------------------------------------------------------------
  */
 static herr_t
-H5D__chunk_redistribute_shared_chunks(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                      const H5D_chunk_map_t *            fm,
-                                      H5D_filtered_collective_io_info_t *local_chunk_array,
-                                      size_t *                           local_chunk_array_num_entries)
+H5D__mpio_get_chunk_redistribute_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
+                                            MPI_Datatype *resized_type, hbool_t *resized_type_derived)
 {
-    H5D_filtered_collective_io_info_t *shared_chunks_info_array =
-        NULL;                        /* The list of all chunks selected in the operation by all processes */
-    H5S_sel_iter_t *mem_iter = NULL; /* Memory iterator for H5D__gather_mem */
-    unsigned char **mod_data =
-        NULL; /* Array of chunk modification data buffers sent by a process to new chunk owners */
-    MPI_Request *send_requests = NULL; /* Array of MPI_Isend chunk modification data send requests */
-    MPI_Status * send_statuses = NULL; /* Array of MPI_Isend chunk modification send statuses */
-    hbool_t      mem_iter_init = FALSE;
-    size_t       shared_chunks_info_array_num_entries = 0;
-    size_t       num_send_requests                    = 0;
-    size_t *     num_assigned_chunks_array            = NULL;
-    size_t       i, last_assigned_idx;
-    int *        send_counts        = NULL;
-    int *        send_displacements = NULL;
-    int          scatter_recvcount_int;
-    int          mpi_rank, mpi_size, mpi_code;
+    MPI_Datatype struct_type              = MPI_DATATYPE_NULL;
+    hbool_t      struct_type_derived      = FALSE;
+    MPI_Datatype chunk_block_type         = MPI_DATATYPE_NULL;
+    hbool_t      chunk_block_type_derived = FALSE;
+    MPI_Datatype types[5];
+    MPI_Aint     displacements[5];
+    int          block_lengths[5];
+    int          field_count;
+    int          mpi_code;
     herr_t       ret_value = SUCCEED;
 
     FUNC_ENTER_STATIC
 
-    HDassert(io_info);
-    HDassert(type_info);
-    HDassert(fm);
-    HDassert(local_chunk_array_num_entries);
+    HDassert(contig_type);
+    HDassert(contig_type_derived);
+    HDassert(resized_type);
+    HDassert(resized_type_derived);
 
-    if ((mpi_rank = H5F_mpi_get_rank(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi rank")
-    if ((mpi_size = H5F_mpi_get_size(io_info->dset->oloc.file)) < 0)
-        HGOTO_ERROR(H5E_IO, H5E_MPI, FAIL, "unable to obtain mpi size")
+    *contig_type_derived  = FALSE;
+    *resized_type_derived = FALSE;
 
-    /* Set to latest format for encoding dataspace */
-    H5CX_set_libver_bounds(NULL);
+    /* Create struct type for the inner H5F_block_t structure */
+    if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description")
 
-    if (*local_chunk_array_num_entries)
-        if (NULL == (send_requests =
-                         (MPI_Request *)H5MM_malloc(*local_chunk_array_num_entries * sizeof(MPI_Request))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate send requests buffer")
+    field_count = 5;
+    HDassert(field_count == (sizeof(types) / sizeof(MPI_Datatype)));
 
-    if (NULL == (mem_iter = (H5S_sel_iter_t *)H5MM_malloc(sizeof(H5S_sel_iter_t))))
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator")
+    /*
+     * Create structure type to pack chunk H5F_block_t structure
+     * next to chunk_idx, orig_owner, new_owner and num_writers
+     * fields
+     */
+    block_lengths[0] = 1;
+    block_lengths[1] = 1;
+    block_lengths[2] = 1;
+    block_lengths[3] = 1;
+    block_lengths[4] = 1;
+    displacements[0] = offsetof(H5D_chunk_redistribute_info_t, chunk_block);
+    displacements[1] = offsetof(H5D_chunk_redistribute_info_t, chunk_idx);
+    displacements[2] = offsetof(H5D_chunk_redistribute_info_t, orig_owner);
+    displacements[3] = offsetof(H5D_chunk_redistribute_info_t, new_owner);
+    displacements[4] = offsetof(H5D_chunk_redistribute_info_t, num_writers);
+    types[0]         = chunk_block_type;
+    types[1]         = HSIZE_AS_MPI_TYPE;
+    types[2]         = MPI_INT;
+    types[3]         = MPI_INT;
+    types[4]         = MPI_INT;
+    if (MPI_SUCCESS !=
+        (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, contig_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+    *contig_type_derived = TRUE;
+
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
 
-    /* Gather every rank's list of chunks to rank 0 to allow it to perform the redistribution operation. After
-     * this call, the gathered list will initially be sorted in increasing order of chunk offset in the file.
+    /* Create struct type to extract the chunk_current, chunk_idx, orig_owner,
+     * new_owner and num_writers fields from a H5D_filtered_collective_io_info_t
+     * structure
      */
-    if (H5D__mpio_array_gatherv(local_chunk_array, *local_chunk_array_num_entries,
-                                sizeof(H5D_filtered_collective_io_info_t), (void **)&shared_chunks_info_array,
-                                &shared_chunks_info_array_num_entries, false, 0, io_info->comm,
-                                H5D__cmp_filtered_collective_io_info_entry) < 0)
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTGATHER, FAIL, "couldn't gather array")
+    block_lengths[0] = 1;
+    block_lengths[1] = 1;
+    block_lengths[2] = 1;
+    block_lengths[3] = 1;
+    block_lengths[4] = 1;
+    displacements[0] = offsetof(H5D_filtered_collective_io_info_t, chunk_current);
+    displacements[1] = offsetof(H5D_filtered_collective_io_info_t, index_info.chunk_idx);
+    displacements[2] = offsetof(H5D_filtered_collective_io_info_t, orig_owner);
+    displacements[3] = offsetof(H5D_filtered_collective_io_info_t, new_owner);
+    displacements[4] = offsetof(H5D_filtered_collective_io_info_t, num_writers);
+    types[0]         = chunk_block_type;
+    types[1]         = HSIZE_AS_MPI_TYPE;
+    types[2]         = MPI_INT;
+    types[3]         = MPI_INT;
+    types[4]         = MPI_INT;
+    if (MPI_SUCCESS !=
+        (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+    struct_type_derived = TRUE;
 
-    /* Rank 0 redistributes any shared chunks to new owners as necessary */
-    if (mpi_rank == 0) {
-        if (NULL == (send_counts = (int *)H5MM_calloc((size_t)mpi_size * sizeof(int))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate send counts buffer")
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(
+                            struct_type, 0, sizeof(H5D_filtered_collective_io_info_t), resized_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
+    *resized_type_derived = TRUE;
 
-        if (NULL == (send_displacements = (int *)H5MM_malloc((size_t)mpi_size * sizeof(int))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate send displacements buffer")
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
 
-        if (NULL == (num_assigned_chunks_array = (size_t *)H5MM_calloc((size_t)mpi_size * sizeof(size_t))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
-                        "unable to allocate number of assigned chunks array")
+done:
+    if (struct_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+    if (chunk_block_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&chunk_block_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
 
-        for (i = 0; i < shared_chunks_info_array_num_entries;) {
-            H5D_filtered_collective_io_info_t *chunk_entry;
-            haddr_t last_seen_addr  = shared_chunks_info_array[i].chunk_states.chunk_current.offset;
-            size_t  set_begin_index = i;
-            size_t  num_writers     = 0;
-            int     new_chunk_owner = shared_chunks_info_array[i].owners.original_owner;
+    if (ret_value < 0) {
+        if (*resized_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *resized_type_derived = FALSE;
+        }
+        if (*contig_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *contig_type_derived = FALSE;
+        }
+    }
 
-            /* Process each set of duplicate entries caused by another process writing to the same chunk */
-            do {
-                chunk_entry = &shared_chunks_info_array[i];
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_get_chunk_redistribute_info_types() */
 
-                send_counts[chunk_entry->owners.original_owner] += (int)sizeof(*chunk_entry);
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_get_chunk_alloc_info_types
+ *
+ * Purpose:     Constructs MPI derived datatypes for communicating the info
+ *              from a H5D_filtered_collective_io_info_t structure that is
+ *              necessary for re-allocating file space during a collective
+ *              write of filtered chunks.
+ *
+ *              The datatype returned through `contig_type` has an extent
+ *              equal to the size of an H5D_chunk_alloc_info_t structure
+ *              and is suitable for communicating that structure type.
+ *
+ *              The datatype returned through `resized_type` has an extent
+ *              equal to the size of an H5D_filtered_collective_io_info_t
+ *              structure. This makes it suitable for sending an array of
+ *              those structures, while extracting out just the info
+ *              necessary for the chunk file space reallocation operation
+ *              during communication.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_get_chunk_alloc_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
+                                     MPI_Datatype *resized_type, hbool_t *resized_type_derived)
+{
+    MPI_Datatype struct_type              = MPI_DATATYPE_NULL;
+    hbool_t      struct_type_derived      = FALSE;
+    MPI_Datatype chunk_block_type         = MPI_DATATYPE_NULL;
+    hbool_t      chunk_block_type_derived = FALSE;
+    MPI_Datatype types[3];
+    MPI_Aint     displacements[3];
+    int          block_lengths[3];
+    int          field_count;
+    int          mpi_code;
+    herr_t       ret_value = SUCCEED;
 
-                /* The new owner of the chunk is determined by the process
-                 * writing to the chunk which currently has the least amount
-                 * of chunks assigned to it
-                 */
-                if (num_assigned_chunks_array[chunk_entry->owners.original_owner] <
-                    num_assigned_chunks_array[new_chunk_owner])
-                    new_chunk_owner = chunk_entry->owners.original_owner;
+    FUNC_ENTER_STATIC
 
-                num_writers++;
-            } while (++i < shared_chunks_info_array_num_entries &&
-                     shared_chunks_info_array[i].chunk_states.chunk_current.offset == last_seen_addr);
+    HDassert(contig_type);
+    HDassert(contig_type_derived);
+    HDassert(resized_type);
+    HDassert(resized_type_derived);
 
-            /* Set all of the chunk entries' "new_owner" fields */
-            for (; set_begin_index < i; set_begin_index++) {
-                shared_chunks_info_array[set_begin_index].owners.new_owner = new_chunk_owner;
-                shared_chunks_info_array[set_begin_index].num_writers      = num_writers;
-            } /* end for */
+    *contig_type_derived  = FALSE;
+    *resized_type_derived = FALSE;
 
-            num_assigned_chunks_array[new_chunk_owner]++;
-        } /* end for */
+    /* Create struct type for the inner H5F_block_t structure */
+    if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description")
 
-        /* Sort the new list in order of previous owner so that each original owner of a chunk
-         * entry gets that entry back, with the possibly newly-modified "new_owner" field
-         */
-        if (shared_chunks_info_array_num_entries > 1)
-            HDqsort(shared_chunks_info_array, shared_chunks_info_array_num_entries,
-                    sizeof(H5D_filtered_collective_io_info_t),
-                    H5D__cmp_filtered_collective_io_info_entry_owner);
-
-        send_displacements[0] = 0;
-        for (i = 1; i < (size_t)mpi_size; i++)
-            send_displacements[i] = send_displacements[i - 1] + send_counts[i - 1];
-    } /* end if */
+    field_count = 3;
+    HDassert(field_count == (sizeof(types) / sizeof(MPI_Datatype)));
 
-    /* Scatter the segments of the list back to each process */
-    H5_CHECKED_ASSIGN(scatter_recvcount_int, int,
-                      *local_chunk_array_num_entries * sizeof(H5D_filtered_collective_io_info_t), size_t);
+    /*
+     * Create structure type to pack both chunk H5F_block_t structures
+     * next to chunk_idx field
+     */
+    block_lengths[0] = 1;
+    block_lengths[1] = 1;
+    block_lengths[2] = 1;
+    displacements[0] = offsetof(H5D_chunk_alloc_info_t, chunk_current);
+    displacements[1] = offsetof(H5D_chunk_alloc_info_t, chunk_new);
+    displacements[2] = offsetof(H5D_chunk_alloc_info_t, chunk_idx);
+    types[0]         = chunk_block_type;
+    types[1]         = chunk_block_type;
+    types[2]         = HSIZE_AS_MPI_TYPE;
     if (MPI_SUCCESS !=
-        (mpi_code = MPI_Scatterv(shared_chunks_info_array, send_counts, send_displacements, MPI_BYTE,
-                                 local_chunk_array, scatter_recvcount_int, MPI_BYTE, 0, io_info->comm)))
-        HMPI_GOTO_ERROR(FAIL, "unable to scatter shared chunks info buffer", mpi_code)
+        (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, contig_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+    *contig_type_derived = TRUE;
 
-    if (shared_chunks_info_array) {
-        H5MM_free(shared_chunks_info_array);
-        shared_chunks_info_array = NULL;
-    } /* end if */
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
 
-    /* Now that the chunks have been redistributed, each process must send its modification data
-     * to the new owners of any of the chunks it previously possessed. Accordingly, each process
-     * must also issue asynchronous receives for any messages it may receive for each of the
-     * chunks it is assigned, in order to avoid potential deadlocking issues.
+    /*
+     * Create struct type to extract the chunk_current, chunk_new and chunk_idx
+     * fields from a H5D_filtered_collective_io_info_t structure
      */
-    if (*local_chunk_array_num_entries)
-        if (NULL == (mod_data = (unsigned char **)H5MM_malloc(*local_chunk_array_num_entries *
-                                                              sizeof(unsigned char *))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate modification data buffer array")
-
-    /* Perform all the sends on the chunks that this rank doesn't own */
-    /* (Sends and recvs must be two separate loops, to avoid deadlock) */
-    for (i = 0, last_assigned_idx = 0; i < *local_chunk_array_num_entries; i++) {
-        H5D_filtered_collective_io_info_t *chunk_entry = &local_chunk_array[i];
-
-        if (mpi_rank != chunk_entry->owners.new_owner) {
-            H5D_chunk_info_t *chunk_info = NULL;
-            unsigned char *   mod_data_p = NULL;
-            hsize_t           iter_nelmts;
-            size_t            mod_data_size;
-
-            /* Look up the chunk and get its file and memory dataspaces */
-            if (NULL == (chunk_info = (H5D_chunk_info_t *)H5SL_search(fm->sel_chunks, &chunk_entry->index)))
-                HGOTO_ERROR(H5E_DATASPACE, H5E_NOTFOUND, FAIL, "can't locate chunk in skip list")
+    block_lengths[0] = 1;
+    block_lengths[1] = 1;
+    block_lengths[2] = 1;
+    displacements[0] = offsetof(H5D_filtered_collective_io_info_t, chunk_current);
+    displacements[1] = offsetof(H5D_filtered_collective_io_info_t, chunk_new);
+    displacements[2] = offsetof(H5D_filtered_collective_io_info_t, index_info.chunk_idx);
+    types[0]         = chunk_block_type;
+    types[1]         = chunk_block_type;
+    types[2]         = HSIZE_AS_MPI_TYPE;
+    if (MPI_SUCCESS !=
+        (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+    struct_type_derived = TRUE;
 
-            /* Determine size of serialized chunk file dataspace, plus the size of
-             * the data being written
-             */
-            if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to get encoded dataspace size")
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(
+                            struct_type, 0, sizeof(H5D_filtered_collective_io_info_t), resized_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
+    *resized_type_derived = TRUE;
 
-            iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
 
-            H5_CHECK_OVERFLOW(iter_nelmts, hsize_t, size_t);
-            mod_data_size += (size_t)iter_nelmts * type_info->src_type_size;
+done:
+    if (struct_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+    if (chunk_block_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&chunk_block_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
 
-            if (NULL == (mod_data[num_send_requests] = (unsigned char *)H5MM_malloc(mod_data_size)))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
-                            "couldn't allocate chunk modification send buffer")
+    if (ret_value < 0) {
+        if (*resized_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *resized_type_derived = FALSE;
+        }
+        if (*contig_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *contig_type_derived = FALSE;
+        }
+    }
 
-            /* Serialize the chunk's file dataspace into the buffer */
-            mod_data_p = mod_data[num_send_requests];
-            if (H5S_encode(chunk_info->fspace, &mod_data_p, &mod_data_size) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTENCODE, FAIL, "unable to encode dataspace")
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_get_chunk_alloc_info_types() */
 
-            /* Initialize iterator for memory selection */
-            if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size, 0) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
-                            "unable to initialize memory selection information")
-            mem_iter_init = TRUE;
+/*-------------------------------------------------------------------------
+ * Function:    H5D__mpio_get_chunk_insert_info_types
+ *
+ * Purpose:     Constructs MPI derived datatypes for communicating the
+ *              information necessary when reinserting chunks into a
+ *              dataset's chunk index. This includes the chunk's new offset
+ *              and size (H5F_block_t) and the inner `index_info` structure
+ *              of a H5D_filtered_collective_io_info_t structure.
+ *
+ *              The datatype returned through `contig_type` has an extent
+ *              equal to the size of an H5D_chunk_insert_info_t structure
+ *              and is suitable for communicating that structure type.
+ *
+ *              The datatype returned through `resized_type` has an extent
+ *              equal to the size of the encompassing
+ *              H5D_filtered_collective_io_info_t structure. This makes it
+ *              suitable for sending an array of
+ *              H5D_filtered_collective_io_info_t structures, while
+ *              extracting out just the information needed during
+ *              communication.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5D__mpio_get_chunk_insert_info_types(MPI_Datatype *contig_type, hbool_t *contig_type_derived,
+                                      MPI_Datatype *resized_type, hbool_t *resized_type_derived)
+{
+    MPI_Datatype struct_type              = MPI_DATATYPE_NULL;
+    hbool_t      struct_type_derived      = FALSE;
+    MPI_Datatype chunk_block_type         = MPI_DATATYPE_NULL;
+    hbool_t      chunk_block_type_derived = FALSE;
+    MPI_Aint     contig_type_extent;
+    MPI_Datatype types[4];
+    MPI_Aint     displacements[4];
+    int          block_lengths[4];
+    int          field_count;
+    int          mpi_code;
+    herr_t       ret_value = SUCCEED;
 
-            /* Collect the modification data into the buffer */
-            if (0 == H5D__gather_mem(io_info->u.wbuf, mem_iter, (size_t)iter_nelmts, mod_data_p))
-                HGOTO_ERROR(H5E_IO, H5E_CANTGATHER, FAIL, "couldn't gather from write buffer")
+    FUNC_ENTER_STATIC
 
-            /* Send modification data to new owner */
-            H5_CHECK_OVERFLOW(mod_data_size, size_t, int)
-            H5_CHECK_OVERFLOW(chunk_entry->index, hsize_t, int)
-            if (MPI_SUCCESS !=
-                (mpi_code = MPI_Isend(mod_data[num_send_requests], (int)mod_data_size, MPI_BYTE,
-                                      chunk_entry->owners.new_owner, (int)chunk_entry->index, io_info->comm,
-                                      &send_requests[num_send_requests])))
-                HMPI_GOTO_ERROR(FAIL, "MPI_Isend failed", mpi_code)
+    HDassert(contig_type);
+    HDassert(contig_type_derived);
+    HDassert(resized_type);
+    HDassert(resized_type_derived);
 
-            if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release memory selection iterator")
-            mem_iter_init = FALSE;
+    *contig_type_derived  = FALSE;
+    *resized_type_derived = FALSE;
 
-            num_send_requests++;
-        } /* end if */
-    }     /* end for */
+    /* Create struct type for an H5F_block_t structure */
+    if (H5F_mpi_get_file_block_type(FALSE, &chunk_block_type, &chunk_block_type_derived) < 0)
+        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't create derived type for chunk file description")
 
-    /* Perform all the recvs on the chunks this rank owns */
-    for (i = 0, last_assigned_idx = 0; i < *local_chunk_array_num_entries; i++) {
-        H5D_filtered_collective_io_info_t *chunk_entry = &local_chunk_array[i];
+    field_count = 4;
+    HDassert(field_count == (sizeof(types) / sizeof(MPI_Datatype)));
 
-        if (mpi_rank == chunk_entry->owners.new_owner) {
-            /* Allocate all necessary buffers for an asynchronous receive operation */
-            if (chunk_entry->num_writers > 1) {
-                MPI_Message message;
-                MPI_Status  status;
-                size_t      j;
+    /*
+     * Create struct type to pack information into memory as follows:
+     *
+     * Chunk's new Offset/Size (H5F_block_t) ->
+     * Chunk Index Info (H5D_chunk_index_info_t)
+     */
+    block_lengths[0] = 1;
+    block_lengths[1] = 1;
+    block_lengths[2] = 1;
+    block_lengths[3] = 1;
+    displacements[0] = offsetof(H5D_chunk_insert_info_t, chunk_block);
+    displacements[1] = offsetof(H5D_chunk_insert_info_t, index_info.chunk_idx);
+    displacements[2] = offsetof(H5D_chunk_insert_info_t, index_info.filter_mask);
+    displacements[3] = offsetof(H5D_chunk_insert_info_t, index_info.need_insert);
+    types[0]         = chunk_block_type;
+    types[1]         = HSIZE_AS_MPI_TYPE;
+    types[2]         = MPI_UNSIGNED;
+    types[3]         = MPI_C_BOOL;
+    if (MPI_SUCCESS !=
+        (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+    struct_type_derived = TRUE;
 
-                chunk_entry->async_info.num_receive_requests = (int)chunk_entry->num_writers - 1;
-                if (NULL == (chunk_entry->async_info.receive_requests_array = (MPI_Request *)H5MM_malloc(
-                                 (size_t)chunk_entry->async_info.num_receive_requests * sizeof(MPI_Request))))
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate async requests array")
+    contig_type_extent = (MPI_Aint)(sizeof(H5F_block_t) + sizeof(H5D_chunk_index_info_t));
 
-                if (NULL ==
-                    (chunk_entry->async_info.receive_buffer_array = (unsigned char **)H5MM_malloc(
-                         (size_t)chunk_entry->async_info.num_receive_requests * sizeof(unsigned char *))))
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "unable to allocate async receive buffers")
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(struct_type, 0, contig_type_extent, contig_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
+    *contig_type_derived = TRUE;
 
-                for (j = 0; j < chunk_entry->num_writers - 1; j++) {
-                    int count = 0;
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(contig_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
 
-                    /* Probe for a particular message from any process, removing that message
-                     * from the receive queue in the process and allocating that much memory
-                     * for the asynchronous receive
-                     */
-                    if (MPI_SUCCESS != (mpi_code = MPI_Mprobe(MPI_ANY_SOURCE, (int)chunk_entry->index,
-                                                              io_info->comm, &message, &status)))
-                        HMPI_GOTO_ERROR(FAIL, "MPI_Mprobe failed", mpi_code)
-
-                    if (MPI_SUCCESS != (mpi_code = MPI_Get_count(&status, MPI_BYTE, &count)))
-                        HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code)
-
-                    HDassert(count >= 0);
-                    if (NULL == (chunk_entry->async_info.receive_buffer_array[j] =
-                                     (unsigned char *)H5MM_malloc((size_t)count * sizeof(char *))))
-                        HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL,
-                                    "unable to allocate modification data receive buffer")
-
-                    if (MPI_SUCCESS != (mpi_code = MPI_Imrecv(
-                                            chunk_entry->async_info.receive_buffer_array[j], count, MPI_BYTE,
-                                            &message, &chunk_entry->async_info.receive_requests_array[j])))
-                        HMPI_GOTO_ERROR(FAIL, "MPI_Imrecv failed", mpi_code)
-                } /* end for */
-            }     /* end if */
-
-            local_chunk_array[last_assigned_idx++] = local_chunk_array[i];
-        } /* end else */
-    }     /* end for */
+    struct_type_derived = FALSE;
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
 
-    *local_chunk_array_num_entries = last_assigned_idx;
+    /*
+     * Create struct type to correctly extract all needed
+     * information from a H5D_filtered_collective_io_info_t
+     * structure.
+     */
+    displacements[0] = offsetof(H5D_filtered_collective_io_info_t, chunk_new);
+    displacements[1] = offsetof(H5D_filtered_collective_io_info_t, index_info.chunk_idx);
+    displacements[2] = offsetof(H5D_filtered_collective_io_info_t, index_info.filter_mask);
+    displacements[3] = offsetof(H5D_filtered_collective_io_info_t, index_info.need_insert);
+    if (MPI_SUCCESS !=
+        (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, &struct_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+    struct_type_derived = TRUE;
 
-    /* Wait for all async send requests to complete before returning */
-    if (num_send_requests) {
-        if (NULL == (send_statuses = (MPI_Status *)H5MM_malloc(num_send_requests * sizeof(MPI_Status))))
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate send statuses buffer")
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_create_resized(
+                            struct_type, 0, sizeof(H5D_filtered_collective_io_info_t), resized_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_resized failed", mpi_code)
+    *resized_type_derived = TRUE;
 
-        H5_CHECK_OVERFLOW(num_send_requests, size_t, int);
-        if (MPI_SUCCESS != (mpi_code = MPI_Waitall((int)num_send_requests, send_requests, send_statuses)))
-            HMPI_GOTO_ERROR(FAIL, "MPI_Waitall failed", mpi_code)
-    } /* end if */
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(resized_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
 
 done:
-    /* Now that all async send requests have completed, free up the send
-     * buffers used in the async operations
-     */
-    for (i = 0; i < num_send_requests; i++) {
-        if (mod_data[i])
-            H5MM_free(mod_data[i]);
-    } /* end for */
+    if (struct_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&struct_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
+    if (chunk_block_type_derived) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&chunk_block_type)))
+            HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+    }
 
-    if (send_requests)
-        H5MM_free(send_requests);
-    if (send_statuses)
-        H5MM_free(send_statuses);
-    if (send_counts)
-        H5MM_free(send_counts);
-    if (send_displacements)
-        H5MM_free(send_displacements);
-    if (mod_data)
-        H5MM_free(mod_data);
-    if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
-        HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
-    if (mem_iter)
-        H5MM_free(mem_iter);
-    if (num_assigned_chunks_array)
-        H5MM_free(num_assigned_chunks_array);
-    if (shared_chunks_info_array)
-        H5MM_free(shared_chunks_info_array);
+    if (ret_value < 0) {
+        if (*resized_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(resized_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *resized_type_derived = FALSE;
+        }
+        if (*contig_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(contig_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *contig_type_derived = FALSE;
+        }
+    }
 
     FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D__chunk_redistribute_shared_chunks() */
-#endif
+} /* end H5D__mpio_get_chunk_insert_info_types() */
 
 /*-------------------------------------------------------------------------
- * Function:    H5D__mpio_filtered_collective_write_type
+ * Function:    H5D__mpio_collective_filtered_io_type
  *
  * Purpose:     Constructs a MPI derived datatype for both the memory and
- *              the file for a collective write of filtered chunks. The
- *              datatype contains the offsets in the file and the locations
- *              of the filtered chunk data buffers.
+ *              the file for a collective I/O operation on filtered chunks.
+ *              The datatype contains the chunk offsets and lengths in the
+ *              file and the locations of the chunk data buffers to read
+ *              into/write from.
  *
  * Return:      Non-negative on success/Negative on failure
  *
- * Programmer:  Jordan Henderson
- *              Tuesday, November 22, 2016
- *
  *-------------------------------------------------------------------------
  */
 static herr_t
-H5D__mpio_filtered_collective_write_type(H5D_filtered_collective_io_info_t *chunk_list, size_t num_entries,
-                                         MPI_Datatype *new_mem_type, hbool_t *mem_type_derived,
-                                         MPI_Datatype *new_file_type, hbool_t *file_type_derived)
+H5D__mpio_collective_filtered_io_type(H5D_filtered_collective_io_info_t *chunk_list, size_t num_entries,
+                                      H5D_io_op_type_t op_type, MPI_Datatype *new_mem_type,
+                                      hbool_t *mem_type_derived, MPI_Datatype *new_file_type,
+                                      hbool_t *file_type_derived)
 {
-    MPI_Aint *write_buf_array   = NULL; /* Relative displacements of filtered chunk data buffers */
+    MPI_Aint *io_buf_array      = NULL; /* Relative displacements of filtered chunk data buffers */
     MPI_Aint *file_offset_array = NULL; /* Chunk offsets in the file */
     int *     length_array      = NULL; /* Filtered Chunk lengths */
-    herr_t    ret_value         = SUCCEED;
+    int       mpi_code;
+    herr_t    ret_value = SUCCEED;
 
     FUNC_ENTER_STATIC
 
-    HDassert(chunk_list);
+    HDassert(chunk_list || 0 == num_entries);
     HDassert(new_mem_type);
     HDassert(mem_type_derived);
     HDassert(new_file_type);
     HDassert(file_type_derived);
 
-    if (num_entries > 0) {
-        size_t i;
-        int    mpi_code;
-        void * base_buf;
-
-        H5_CHECK_OVERFLOW(num_entries, size_t, int);
-
-        /* Allocate arrays */
-        if (NULL == (length_array = (int *)H5MM_malloc((size_t)num_entries * sizeof(int))))
-            HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
-                        "memory allocation failed for filtered collective write length array")
-        if (NULL == (write_buf_array = (MPI_Aint *)H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint))))
-            HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
-                        "memory allocation failed for filtered collective write buf length array")
-        if (NULL == (file_offset_array = (MPI_Aint *)H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint))))
-            HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
-                        "memory allocation failed for collective write offset array")
-
-        /* Ensure the list is sorted in ascending order of offset in the file */
-        HDqsort(chunk_list, num_entries, sizeof(H5D_filtered_collective_io_info_t),
-                H5D__cmp_filtered_collective_io_info_entry);
-
-        base_buf = chunk_list[0].buf;
-        for (i = 0; i < num_entries; i++) {
-            /* Set up the offset in the file, the length of the chunk data, and the relative
-             * displacement of the chunk data write buffer
-             */
-            file_offset_array[i] = (MPI_Aint)chunk_list[i].chunk_states.new_chunk.offset;
-            length_array[i]      = (int)chunk_list[i].chunk_states.new_chunk.length;
-            write_buf_array[i]   = (MPI_Aint)chunk_list[i].buf - (MPI_Aint)base_buf;
-        } /* end for */
-
-        /* Create memory MPI type */
-        if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_entries, length_array,
-                                                                write_buf_array, MPI_BYTE, new_mem_type)))
-            HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
-        *mem_type_derived = TRUE;
-        if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_mem_type)))
-            HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-
-        /* Create file MPI type */
-        if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed((int)num_entries, length_array,
-                                                                file_offset_array, MPI_BYTE, new_file_type)))
-            HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
-        *file_type_derived = TRUE;
-        if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_file_type)))
-            HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
-    } /* end if */
-
-done:
-    if (write_buf_array)
-        H5MM_free(write_buf_array);
-    if (file_offset_array)
-        H5MM_free(file_offset_array);
-    if (length_array)
-        H5MM_free(length_array);
-
-    FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D__mpio_filtered_collective_write_type() */
-
-/*-------------------------------------------------------------------------
- * Function:    H5D__filtered_collective_chunk_entry_io
- *
- * Purpose:     Given an entry for a filtered chunk, performs the necessary
- *              steps for updating the chunk data during a collective
- *              write, or for reading the chunk from file during a
- *              collective read.
- *
- * Return:      Non-negative on success/Negative on failure
- *
- * Programmer:  Jordan Henderson
- *              Wednesday, January 18, 2017
- *
- *-------------------------------------------------------------------------
- */
-static herr_t
-H5D__filtered_collective_chunk_entry_io(H5D_filtered_collective_io_info_t *chunk_entry,
-                                        const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
-                                        const H5D_chunk_map_t *fm)
-{
-    H5D_chunk_info_t *chunk_info = NULL;
-    H5S_sel_iter_t *  mem_iter   = NULL; /* Memory iterator for H5D__scatter_mem/H5D__gather_mem */
-    H5S_sel_iter_t *  file_iter  = NULL;
-    H5Z_EDC_t         err_detect; /* Error detection info */
-    H5Z_cb_t          filter_cb;  /* I/O filter callback function */
-    unsigned          filter_mask = 0;
-    hsize_t           iter_nelmts; /* Number of points to iterate over for the chunk IO operation */
-    hssize_t          extent_npoints;
-    hsize_t           true_chunk_size;
-    hbool_t           mem_iter_init  = FALSE;
-    hbool_t           file_iter_init = FALSE;
-    size_t            buf_size;
-    size_t            i;
-    H5S_t *           dataspace    = NULL; /* Other process' dataspace for the chunk */
-    void *            tmp_gath_buf = NULL; /* Temporary gather buffer to gather into from application buffer
-                                              before scattering out to the chunk data buffer (when writing data),
-                                              or vice versa (when reading data) */
-    int    mpi_code;
-    herr_t ret_value = SUCCEED;
-
-    FUNC_ENTER_STATIC
-
-    HDassert(chunk_entry);
-    HDassert(io_info);
-    HDassert(type_info);
-    HDassert(fm);
-
-    /* Retrieve filter settings from API context */
-    if (H5CX_get_err_detect(&err_detect) < 0)
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get error detection info")
-    if (H5CX_get_filter_cb(&filter_cb) < 0)
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get I/O filter callback function")
-
-    /* Look up the chunk and get its file and memory dataspaces */
-    if (NULL == (chunk_info = (H5D_chunk_info_t *)H5SL_search(fm->sel_chunks, &chunk_entry->index)))
-        HGOTO_ERROR(H5E_DATASPACE, H5E_NOTFOUND, FAIL, "can't locate chunk in skip list")
-
-    if ((extent_npoints = H5S_GET_EXTENT_NPOINTS(chunk_info->fspace)) < 0)
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTCOUNT, FAIL, "dataspace is invalid")
-    true_chunk_size = (hsize_t)extent_npoints * type_info->src_type_size;
-
-    /* If the size of the filtered chunk is larger than the number of points in the
-     * chunk file space extent times the datatype size, allocate enough space to hold the
-     * whole filtered chunk. Otherwise, allocate a buffer equal to the size of the
-     * chunk so that the unfiltering operation doesn't have to grow the buffer.
-     */
-    buf_size = MAX(chunk_entry->chunk_states.chunk_current.length, true_chunk_size);
+    *mem_type_derived  = FALSE;
+    *file_type_derived = FALSE;
+    *new_mem_type      = MPI_BYTE;
+    *new_file_type     = MPI_BYTE;
 
-    if (NULL == (chunk_entry->buf = H5MM_malloc(buf_size)))
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate chunk data buffer")
+    if (num_entries > 0) {
+        H5F_block_t *chunk_block;
+        size_t       last_valid_idx = 0;
+        size_t       i;
+        int          chunk_count;
 
-    /* If this is not a full chunk overwrite or this is a read operation, the chunk must be
-     * read from the file and unfiltered.
-     */
-    if (!chunk_entry->full_overwrite || io_info->op_type == H5D_IO_OP_READ) {
-        H5FD_mpio_xfer_t xfer_mode; /* Parallel transfer for this request */
+        /*
+         * Determine number of chunks for I/O operation and
+         * setup for derived datatype creation if I/O operation
+         * includes multiple chunks
+         */
+        if (num_entries == 1) {
+            /* Set last valid index to 0 for contiguous datatype creation */
+            last_valid_idx = 0;
 
-        chunk_entry->chunk_states.new_chunk.length = chunk_entry->chunk_states.chunk_current.length;
+            if (op_type == H5D_IO_OP_WRITE)
+                chunk_count = 1;
+            else
+                chunk_count = chunk_list[0].need_read ? 1 : 0;
+        }
+        else {
+            MPI_Aint chunk_buf;
+            MPI_Aint base_buf;
+            haddr_t  base_offset = HADDR_UNDEF;
+
+            H5_CHECK_OVERFLOW(num_entries, size_t, int);
+
+            /* Allocate arrays */
+            if (NULL == (length_array = H5MM_malloc((size_t)num_entries * sizeof(int))))
+                HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+                            "memory allocation failed for filtered collective I/O length array")
+            if (NULL == (io_buf_array = H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint))))
+                HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+                            "memory allocation failed for filtered collective I/O buf length array")
+            if (NULL == (file_offset_array = H5MM_malloc((size_t)num_entries * sizeof(MPI_Aint))))
+                HGOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+                            "memory allocation failed for filtered collective I/O offset array")
 
-        /* Currently, these chunk reads are done independently and will likely
-         * cause issues with collective metadata reads enabled. In the future,
-         * this should be refactored to use collective chunk reads - JTH */
+            /*
+             * If doing a write, we can set the base chunk offset
+             * and base chunk data buffer right away.
+             *
+             * If doing a read, some chunks may be skipped over
+             * for reading if they aren't yet allocated in the
+             * file. Therefore, we have to find the first chunk
+             * actually being read in order to set the base chunk
+             * offset and base chunk data buffer.
+             */
+            if (op_type == H5D_IO_OP_WRITE) {
+#if MPI_VERSION >= 3
+                if (MPI_SUCCESS != (mpi_code = MPI_Get_address(chunk_list[0].buf, &base_buf)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code)
+#else
+                base_buf = (MPI_Aint)chunk_list[0].buf;
+#endif
 
-        /* Get the original state of parallel I/O transfer mode */
-        if (H5CX_get_io_xfer_mode(&xfer_mode) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get MPI-I/O transfer mode")
+                base_offset = chunk_list[0].chunk_new.offset;
+            }
 
-        /* Change the xfer_mode to independent for handling the I/O */
-        if (H5CX_set_io_xfer_mode(H5FD_MPIO_INDEPENDENT) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode")
+            for (i = 0, chunk_count = 0; i < num_entries; i++) {
+                if (op_type == H5D_IO_OP_READ) {
+                    /*
+                     * If this chunk isn't being read, don't add it
+                     * to the MPI type we're building up for I/O
+                     */
+                    if (!chunk_list[i].need_read)
+                        continue;
 
-        if (H5F_shared_block_read(io_info->f_sh, H5FD_MEM_DRAW,
-                                  chunk_entry->chunk_states.chunk_current.offset,
-                                  chunk_entry->chunk_states.new_chunk.length, chunk_entry->buf) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "unable to read raw data chunk")
+                    /*
+                     * If this chunk is being read, go ahead and
+                     * set the base chunk offset and base chunk
+                     * data buffer if we haven't already
+                     */
+                    if (!H5F_addr_defined(base_offset)) {
+#if MPI_VERSION >= 3
+                        if (MPI_SUCCESS != (mpi_code = MPI_Get_address(chunk_list[i].buf, &base_buf)))
+                            HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code)
+#else
+                        base_buf = (MPI_Aint)chunk_list[i].buf;
+#endif
 
-        /* Return to the original I/O transfer mode setting */
-        if (H5CX_set_io_xfer_mode(xfer_mode) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTSET, FAIL, "can't set MPI-I/O transfer mode")
+                        base_offset = chunk_list[i].chunk_current.offset;
+                    }
+                }
 
-        if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, H5Z_FLAG_REVERSE, &filter_mask, err_detect,
-                         filter_cb, (size_t *)&chunk_entry->chunk_states.new_chunk.length, &buf_size,
-                         &chunk_entry->buf) < 0)
-            HGOTO_ERROR(H5E_DATASET, H5E_CANTFILTER, FAIL, "couldn't unfilter chunk for modifying")
-    } /* end if */
-    else {
-        chunk_entry->chunk_states.new_chunk.length = true_chunk_size;
-    } /* end else */
+                /* Set convenience pointer for current chunk block */
+                chunk_block =
+                    (op_type == H5D_IO_OP_READ) ? &chunk_list[i].chunk_current : &chunk_list[i].chunk_new;
 
-    /* Initialize iterator for memory selection */
-    if (NULL == (mem_iter = (H5S_sel_iter_t *)H5MM_malloc(sizeof(H5S_sel_iter_t))))
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate memory iterator")
+                /*
+                 * Set the current chunk entry's offset in the file, relative to
+                 * the first chunk entry
+                 */
+                HDassert(H5F_addr_defined(chunk_block->offset));
+                file_offset_array[chunk_count] = (MPI_Aint)(chunk_block->offset - base_offset);
 
-    if (H5S_select_iter_init(mem_iter, chunk_info->mspace, type_info->src_type_size, 0) < 0)
-        HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to initialize memory selection information")
-    mem_iter_init = TRUE;
+                /*
+                 * Ensure the chunk list is sorted in ascending ordering of
+                 * offset in the file
+                 */
+                if (chunk_count)
+                    HDassert(file_offset_array[chunk_count] > file_offset_array[chunk_count - 1]);
 
-    /* If this is a read operation, scatter the read chunk data to the user's buffer.
-     *
-     * If this is a write operation, update the chunk data buffer with the modifications
-     * from the current process, then apply any modifications from other processes. Finally,
-     * filter the newly-updated chunk.
-     */
-    switch (io_info->op_type) {
-        case H5D_IO_OP_READ:
-            if (NULL == (file_iter = (H5S_sel_iter_t *)H5MM_malloc(sizeof(H5S_sel_iter_t))))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate file iterator")
+                /* Set the current chunk entry's size for the I/O operation */
+                H5_CHECK_OVERFLOW(chunk_block->length, hsize_t, int);
+                length_array[chunk_count] = (int)chunk_block->length;
 
-            if (H5S_select_iter_init(file_iter, chunk_info->fspace, type_info->src_type_size, 0) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
-                            "unable to initialize memory selection information")
-            file_iter_init = TRUE;
+                /*
+                 * Set the displacement of the chunk entry's chunk data buffer,
+                 * relative to the first entry's data buffer
+                 */
+#if MPI_VERSION >= 3 && MPI_SUBVERSION >= 1
+                if (MPI_SUCCESS != (mpi_code = MPI_Get_address(chunk_list[i].buf, &chunk_buf)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Get_address failed", mpi_code)
 
-            iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace);
+                io_buf_array[chunk_count] = MPI_Aint_diff(chunk_buf, base_buf);
+#else
+                chunk_buf                 = (MPI_Aint)chunk_list[i].buf;
+                io_buf_array[chunk_count] = chunk_buf - base_buf;
+#endif
 
-            if (NULL == (tmp_gath_buf = H5MM_malloc(iter_nelmts * type_info->src_type_size)))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer")
+                /*
+                 * Set last valid index in case only a single chunk will
+                 * be involved in the I/O operation
+                 */
+                last_valid_idx = i;
 
-            if (!H5D__gather_mem(chunk_entry->buf, file_iter, (size_t)iter_nelmts, tmp_gath_buf))
-                HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "couldn't gather from chunk buffer")
+                chunk_count++;
+            } /* end for */
+        }
 
-            iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
+        /*
+         * Create derived datatypes for the chunk list if this
+         * rank has any chunks to work on
+         */
+        if (chunk_count > 0) {
+            if (chunk_count == 1) {
+                int chunk_len;
 
-            if (H5D__scatter_mem(tmp_gath_buf, mem_iter, (size_t)iter_nelmts, io_info->u.rbuf) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to read buffer")
+                /* Single chunk - use a contiguous type for both memory and file */
 
-            break;
+                /* Ensure that we can cast chunk size to an int for MPI */
+                chunk_block = (op_type == H5D_IO_OP_READ) ? &chunk_list[last_valid_idx].chunk_current
+                                                          : &chunk_list[last_valid_idx].chunk_new;
+                H5_CHECKED_ASSIGN(chunk_len, int, chunk_block->length, hsize_t);
 
-        case H5D_IO_OP_WRITE:
-            iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->mspace);
+                if (MPI_SUCCESS != (mpi_code = MPI_Type_contiguous(chunk_len, MPI_BYTE, new_file_type)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_contiguous failed", mpi_code)
+                *new_mem_type = *new_file_type;
 
-            if (NULL == (tmp_gath_buf = H5MM_malloc(iter_nelmts * type_info->src_type_size)))
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate temporary gather buffer")
+                /*
+                 * Since we use the same datatype for both memory and file, only
+                 * mark the file type as derived so the caller doesn't try to
+                 * free the same type twice
+                 */
+                *mem_type_derived  = FALSE;
+                *file_type_derived = TRUE;
 
-            /* Gather modification data from the application write buffer into a temporary buffer */
-            if (0 == H5D__gather_mem(io_info->u.wbuf, mem_iter, (size_t)iter_nelmts, tmp_gath_buf))
-                HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "couldn't gather from write buffer")
+                if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_file_type)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+            }
+            else {
+                HDassert(file_offset_array);
+                HDassert(length_array);
+                HDassert(io_buf_array);
 
-            if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
-            mem_iter_init = FALSE;
+                /* Multiple chunks - use an hindexed type for both memory and file */
 
-            /* Initialize iterator for file selection */
-            if (H5S_select_iter_init(mem_iter, chunk_info->fspace, type_info->dst_type_size, 0) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
-                            "unable to initialize file selection information")
-            mem_iter_init = TRUE;
+                /* Create memory MPI type */
+                if (MPI_SUCCESS != (mpi_code = MPI_Type_create_hindexed(
+                                        chunk_count, length_array, io_buf_array, MPI_BYTE, new_mem_type)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+                *mem_type_derived = TRUE;
 
-            iter_nelmts = H5S_GET_SELECT_NPOINTS(chunk_info->fspace);
+                if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_mem_type)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
 
-            /* Scatter the owner's modification data into the chunk data buffer according to
-             * the file space.
-             */
-            if (H5D__scatter_mem(tmp_gath_buf, mem_iter, (size_t)iter_nelmts, chunk_entry->buf) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_READERROR, FAIL, "couldn't scatter to chunk data buffer")
+                /* Create file MPI type */
+                if (MPI_SUCCESS !=
+                    (mpi_code = MPI_Type_create_hindexed(chunk_count, length_array, file_offset_array,
+                                                         MPI_BYTE, new_file_type)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_hindexed failed", mpi_code)
+                *file_type_derived = TRUE;
 
-            if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
-                HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
-            mem_iter_init = FALSE;
+                if (MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_file_type)))
+                    HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+            }
+        }
+    } /* end if */
 
-            if (MPI_SUCCESS !=
-                (mpi_code = MPI_Waitall(chunk_entry->async_info.num_receive_requests,
-                                        chunk_entry->async_info.receive_requests_array, MPI_STATUSES_IGNORE)))
-                HMPI_GOTO_ERROR(FAIL, "MPI_Waitall failed", mpi_code)
+done:
+    if (file_offset_array)
+        H5MM_free(file_offset_array);
+    if (io_buf_array)
+        H5MM_free(io_buf_array);
+    if (length_array)
+        H5MM_free(length_array);
 
-            /* For each asynchronous receive call previously posted, receive the chunk modification
-             * buffer from another rank and update the chunk data
-             */
-            for (i = 0; i < (size_t)chunk_entry->async_info.num_receive_requests; i++) {
-                const unsigned char *mod_data_p;
+    if (ret_value < 0) {
+        if (*file_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(new_file_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *file_type_derived = FALSE;
+        }
+        if (*mem_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(new_mem_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *mem_type_derived = FALSE;
+        }
+    }
 
-                /* Decode the process' chunk file dataspace */
-                mod_data_p = chunk_entry->async_info.receive_buffer_array[i];
-                if (NULL == (dataspace = H5S_decode(&mod_data_p)))
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTDECODE, FAIL, "unable to decode dataspace")
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_collective_filtered_io_type() */
 
-                if (H5S_select_iter_init(mem_iter, dataspace, type_info->dst_type_size, 0) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL,
-                                "unable to initialize memory selection information")
-                mem_iter_init = TRUE;
+#ifdef H5Dmpio_DEBUG
 
-                iter_nelmts = H5S_GET_SELECT_NPOINTS(dataspace);
+static herr_t
+H5D__mpio_dump_collective_filtered_chunk_list(H5D_filtered_collective_io_info_t *chunk_list,
+                                              size_t chunk_list_num_entries, int mpi_rank)
+{
+    H5D_filtered_collective_io_info_t *chunk_entry;
+    size_t                             i;
+    herr_t                             ret_value = SUCCEED;
 
-                /* Update the chunk data with the received modification data */
-                if (H5D__scatter_mem(mod_data_p, mem_iter, (size_t)iter_nelmts, chunk_entry->buf) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "couldn't scatter to write buffer")
+    FUNC_ENTER_STATIC_NOERR
 
-                if (H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
-                    HGOTO_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
-                mem_iter_init = FALSE;
-                if (dataspace) {
-                    if (H5S_close(dataspace) < 0)
-                        HGOTO_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace")
-                    dataspace = NULL;
-                }
-                H5MM_free(chunk_entry->async_info.receive_buffer_array[i]);
-            } /* end for */
+    H5D_MPIO_DEBUG(mpi_rank, "CHUNK LIST: [");
+    for (i = 0; i < chunk_list_num_entries; i++) {
+        unsigned chunk_rank;
+
+        chunk_entry = &chunk_list[i];
+
+        HDassert(chunk_entry->chunk_info);
+        chunk_rank = (unsigned)H5S_GET_EXTENT_NDIMS(chunk_entry->chunk_info->fspace);
+
+        H5D_MPIO_DEBUG(mpi_rank, " {");
+        H5D_MPIO_DEBUG_VA(mpi_rank, "   - Entry %zu -", i);
+
+        H5D_MPIO_DEBUG(mpi_rank, "   - Chunk Fspace Info -");
+        H5D_MPIO_DEBUG_VA(mpi_rank,
+                          "     Chunk Current Info: { Offset: %" PRIuHADDR ", Length: %" PRIuHADDR " }",
+                          chunk_entry->chunk_current.offset, chunk_entry->chunk_current.length);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Chunk New Info: { Offset: %" PRIuHADDR ", Length: %" PRIuHADDR " }",
+                          chunk_entry->chunk_new.offset, chunk_entry->chunk_new.length);
+
+        H5D_MPIO_DEBUG(mpi_rank, "   - Chunk Insert Info -");
+        H5D_MPIO_DEBUG_VA(mpi_rank,
+                          "     Chunk Scaled Coords (4-d): { %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                          ", %" PRIuHSIZE " }",
+                          chunk_rank < 1 ? 0 : chunk_entry->chunk_info->scaled[0],
+                          chunk_rank < 2 ? 0 : chunk_entry->chunk_info->scaled[1],
+                          chunk_rank < 3 ? 0 : chunk_entry->chunk_info->scaled[2],
+                          chunk_rank < 4 ? 0 : chunk_entry->chunk_info->scaled[3]);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Chunk Index: %" PRIuHSIZE, chunk_entry->index_info.chunk_idx);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Filter Mask: %u", chunk_entry->index_info.filter_mask);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Need Insert: %s",
+                          chunk_entry->index_info.need_insert ? "YES" : "NO");
+
+        H5D_MPIO_DEBUG(mpi_rank, "   - Other Info -");
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Chunk Info Ptr: %p", (void *)chunk_entry->chunk_info);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Need Read: %s", chunk_entry->need_read ? "YES" : "NO");
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Chunk I/O Size: %zu", chunk_entry->io_size);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Chunk Buffer Size: %zu", chunk_entry->chunk_buf_size);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Original Owner: %d", chunk_entry->orig_owner);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     New Owner: %d", chunk_entry->new_owner);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     # of Writers: %d", chunk_entry->num_writers);
+        H5D_MPIO_DEBUG_VA(mpi_rank, "     Chunk Data Buffer Ptr: %p", (void *)chunk_entry->buf);
+
+        H5D_MPIO_DEBUG(mpi_rank, " }");
+    }
+    H5D_MPIO_DEBUG(mpi_rank, "]");
 
-            /* Filter the chunk */
-            if (H5Z_pipeline(&io_info->dset->shared->dcpl_cache.pline, 0, &filter_mask, err_detect, filter_cb,
-                             (size_t *)&chunk_entry->chunk_states.new_chunk.length, &buf_size,
-                             &chunk_entry->buf) < 0)
-                HGOTO_ERROR(H5E_PLINE, H5E_CANTFILTER, FAIL, "output pipeline failed")
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D__mpio_dump_collective_filtered_chunk_list() */
 
-#if H5_SIZEOF_SIZE_T > 4
-            /* Check for the chunk expanding too much to encode in a 32-bit value */
-            if (chunk_entry->chunk_states.new_chunk.length > ((size_t)0xffffffff))
-                HGOTO_ERROR(H5E_DATASET, H5E_BADRANGE, FAIL, "chunk too large for 32-bit length")
 #endif
-            break;
-
-        default:
-            HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "invalid I/O operation")
-    } /* end switch */
-
-done:
-    if (chunk_entry->async_info.receive_buffer_array)
-        H5MM_free(chunk_entry->async_info.receive_buffer_array);
-    if (chunk_entry->async_info.receive_requests_array)
-        H5MM_free(chunk_entry->async_info.receive_requests_array);
-    if (tmp_gath_buf)
-        H5MM_free(tmp_gath_buf);
-    if (file_iter_init && H5S_SELECT_ITER_RELEASE(file_iter) < 0)
-        HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
-    if (file_iter)
-        H5MM_free(file_iter);
-    if (mem_iter_init && H5S_SELECT_ITER_RELEASE(mem_iter) < 0)
-        HDONE_ERROR(H5E_DATASET, H5E_CANTFREE, FAIL, "couldn't release selection iterator")
-    if (mem_iter)
-        H5MM_free(mem_iter);
-    if (dataspace)
-        if (H5S_close(dataspace) < 0)
-            HDONE_ERROR(H5E_DATASPACE, H5E_CANTFREE, FAIL, "can't close dataspace")
 
-    FUNC_LEAVE_NOAPI(ret_value)
-} /* end H5D__filtered_collective_chunk_entry_io() */
 #endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Dpkg.h b/src/H5Dpkg.h
index 49c95a5..a424929 100644
--- a/src/H5Dpkg.h
+++ b/src/H5Dpkg.h
@@ -559,6 +559,7 @@ H5_DLL herr_t  H5D__alloc_storage(const H5D_io_info_t *io_info, H5D_time_alloc_t
                                   hbool_t full_overwrite, hsize_t old_dim[]);
 H5_DLL herr_t  H5D__get_storage_size(const H5D_t *dset, hsize_t *storage_size);
 H5_DLL herr_t  H5D__get_chunk_storage_size(H5D_t *dset, const hsize_t *offset, hsize_t *storage_size);
+H5_DLL herr_t  H5D__chunk_index_empty(const H5D_t *dset, hbool_t *empty);
 H5_DLL herr_t  H5D__get_num_chunks(const H5D_t *dset, const H5S_t *space, hsize_t *nchunks);
 H5_DLL herr_t  H5D__get_chunk_info(const H5D_t *dset, const H5S_t *space, hsize_t chk_idx, hsize_t *coord,
                                    unsigned *filter_mask, haddr_t *offset, hsize_t *size);
@@ -591,6 +592,10 @@ H5_DLL herr_t H5D__select_read(const H5D_io_info_t *io_info, const H5D_type_info
 H5_DLL herr_t H5D__select_write(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
                                 hsize_t nelmts, H5S_t *file_space, H5S_t *mem_space);
 
+/* Functions that perform direct copying between memory buffers */
+H5_DLL herr_t H5D_select_io_mem(void *dst_buf, const H5S_t *dst_space, const void *src_buf,
+                                const H5S_t *src_space, size_t elmt_size, size_t nelmts);
+
 /* Functions that perform scatter-gather serial I/O operations */
 H5_DLL herr_t H5D__scatter_mem(const void *_tscat_buf, H5S_sel_iter_t *iter, size_t nelmts, void *_buf);
 H5_DLL size_t H5D__gather_mem(const void *_buf, H5S_sel_iter_t *iter, size_t nelmts,
@@ -635,7 +640,13 @@ H5_DLL herr_t  H5D__chunk_allocate(const H5D_io_info_t *io_info, hbool_t full_ov
                                    const hsize_t old_dim[]);
 H5_DLL herr_t  H5D__chunk_file_alloc(const H5D_chk_idx_info_t *idx_info, const H5F_block_t *old_chunk,
                                      H5F_block_t *new_chunk, hbool_t *need_insert, const hsize_t *scaled);
+H5_DLL void *  H5D__chunk_mem_alloc(size_t size, const H5O_pline_t *pline);
+H5_DLL void    H5D__chunk_mem_free(void *chk, const void *_pline);
+H5_DLL void *  H5D__chunk_mem_xfree(void *chk, const void *pline);
+H5_DLL void *  H5D__chunk_mem_realloc(void *chk, size_t size, const H5O_pline_t *pline);
 H5_DLL herr_t  H5D__chunk_update_old_edge_chunks(H5D_t *dset, hsize_t old_dim[]);
+H5_DLL hbool_t H5D__chunk_is_partial_edge_chunk(unsigned dset_ndims, const uint32_t *chunk_dims,
+                                                const hsize_t *chunk_scaled, const hsize_t *dset_dims);
 H5_DLL herr_t  H5D__chunk_prune_by_extent(H5D_t *dset, const hsize_t *old_dim);
 H5_DLL herr_t  H5D__chunk_set_sizes(H5D_t *dset);
 #ifdef H5_HAVE_PARALLEL
@@ -694,11 +705,11 @@ H5_DLL herr_t H5D__fill_term(H5D_fill_buf_info_t *fb_info);
 
 #ifdef H5_HAVE_PARALLEL
 
-#ifdef H5S_DEBUG
+#ifdef H5D_DEBUG
 #ifndef H5Dmpio_DEBUG
 #define H5Dmpio_DEBUG
 #endif /*H5Dmpio_DEBUG*/
-#endif /*H5S_DEBUG*/
+#endif /*H5D_DEBUG*/
 /* MPI-IO function to read, it will select either regular or irregular read */
 H5_DLL herr_t H5D__mpio_select_read(const H5D_io_info_t *io_info, const H5D_type_info_t *type_info,
                                     hsize_t nelmts, H5S_t *file_space, H5S_t *mem_space);
@@ -727,6 +738,8 @@ H5_DLL herr_t H5D__chunk_collective_write(H5D_io_info_t *io_info, const H5D_type
  * memory and the file */
 H5_DLL htri_t H5D__mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *file_space,
                                      const H5S_t *mem_space, const H5D_type_info_t *type_info);
+H5_DLL herr_t H5D__mpio_get_no_coll_cause_strings(char *local_cause, size_t local_cause_len,
+                                                  char *global_cause, size_t global_cause_len);
 
 #endif /* H5_HAVE_PARALLEL */
 
diff --git a/src/H5Dselect.c b/src/H5Dselect.c
index e64d657..f464ca5 100644
--- a/src/H5Dselect.c
+++ b/src/H5Dselect.c
@@ -105,6 +105,9 @@ H5D__select_io(const H5D_io_info_t *io_info, size_t elmt_size, size_t nelmts, H5
     HDassert(io_info->store);
     HDassert(io_info->u.rbuf);
 
+    if (elmt_size == 0)
+        HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "invalid elmt_size of 0")
+
     /* Check for only one element in selection */
     if (nelmts == 1) {
         hsize_t single_mem_off;  /* Offset in memory */
@@ -226,8 +229,6 @@ H5D__select_io(const H5D_io_info_t *io_info, size_t elmt_size, size_t nelmts, H5
 
             /* Decrement number of elements left to process */
             HDassert(((size_t)tmp_file_len % elmt_size) == 0);
-            if (elmt_size == 0)
-                HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "Resulted in division by zero")
             nelmts -= ((size_t)tmp_file_len / elmt_size);
         } /* end while */
     }     /* end else */
@@ -257,6 +258,188 @@ done:
 } /* end H5D__select_io() */
 
 /*-------------------------------------------------------------------------
+ * Function:    H5D_select_io_mem
+ *
+ * Purpose:     Perform memory copies directly between two memory buffers
+ *              according to the selections in the `dst_space` and
+ *              `src_space` dataspaces.
+ *
+ * Note:        This routine is [basically] the same as H5D__select_io,
+ *              with the only difference being that the readvv/writevv
+ *              calls are exchanged for H5VM_memcpyvv calls. Changes should
+ *              be made to both routines.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5D_select_io_mem(void *dst_buf, const H5S_t *dst_space, const void *src_buf, const H5S_t *src_space,
+                  size_t elmt_size, size_t nelmts)
+{
+    H5S_sel_iter_t *dst_sel_iter      = NULL;  /* Destination dataspace iteration info */
+    H5S_sel_iter_t *src_sel_iter      = NULL;  /* Source dataspace iteration info */
+    hbool_t         dst_sel_iter_init = FALSE; /* Destination dataspace selection iterator initialized? */
+    hbool_t         src_sel_iter_init = FALSE; /* Source dataspace selection iterator initialized? */
+    hsize_t *       dst_off           = NULL;  /* Pointer to sequence offsets in destination buffer */
+    hsize_t *       src_off           = NULL;  /* Pointer to sequence offsets in source buffer */
+    size_t *        dst_len           = NULL;  /* Pointer to sequence lengths in destination buffer */
+    size_t *        src_len           = NULL;  /* Pointer to sequence lengths in source buffer */
+    size_t          curr_dst_seq;              /* Current destination buffer sequence to operate on */
+    size_t          curr_src_seq;              /* Current source buffer sequence to operate on */
+    size_t          dst_nseq;                  /* Number of sequences generated for destination buffer */
+    size_t          src_nseq;                  /* Number of sequences generated for source buffer */
+    size_t          dxpl_vec_size;             /* Vector length from API context's DXPL */
+    size_t          vec_size;                  /* Vector length */
+    ssize_t         bytes_copied;
+    herr_t          ret_value = SUCCEED;
+
+    FUNC_ENTER_NOAPI(FAIL)
+
+    HDassert(dst_buf);
+    HDassert(dst_space);
+    HDassert(src_buf);
+    HDassert(src_space);
+
+    if (elmt_size == 0)
+        HGOTO_ERROR(H5E_DATASPACE, H5E_BADVALUE, FAIL, "invalid elmt_size of 0")
+
+    /* Check for only one element in selection */
+    if (nelmts == 1) {
+        hsize_t single_dst_off; /* Offset in dst_space */
+        hsize_t single_src_off; /* Offset in src_space */
+        size_t  single_dst_len; /* Length in dst_space */
+        size_t  single_src_len; /* Length in src_space */
+
+        /* Get offset of first element in selections */
+        if (H5S_SELECT_OFFSET(dst_space, &single_dst_off) < 0)
+            HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve destination selection offset")
+        if (H5S_SELECT_OFFSET(src_space, &single_src_off) < 0)
+            HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve source selection offset")
+
+        /* Set up necessary information for I/O operation */
+        dst_nseq = src_nseq = 1;
+        curr_dst_seq = curr_src_seq = 0;
+        single_dst_off *= elmt_size;
+        single_src_off *= elmt_size;
+        single_dst_len = single_src_len = elmt_size;
+
+        /* Perform vectorized memcpy from src_buf to dst_buf */
+        if ((bytes_copied =
+                 H5VM_memcpyvv(dst_buf, dst_nseq, &curr_dst_seq, &single_dst_len, &single_dst_off, src_buf,
+                               src_nseq, &curr_src_seq, &single_src_len, &single_src_off)) < 0)
+            HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "vectorized memcpy failed")
+
+        HDassert(((size_t)bytes_copied % elmt_size) == 0);
+    }
+    else {
+        unsigned sel_iter_flags = H5S_SEL_ITER_GET_SEQ_LIST_SORTED | H5S_SEL_ITER_SHARE_WITH_DATASPACE;
+        size_t   dst_nelem; /* Number of elements used in destination buffer sequences */
+        size_t   src_nelem; /* Number of elements used in source buffer sequences */
+
+        /* Get info from API context */
+        if (H5CX_get_vec_size(&dxpl_vec_size) < 0)
+            HGOTO_ERROR(H5E_IO, H5E_CANTGET, FAIL, "can't retrieve I/O vector size")
+
+        /* Allocate the vector I/O arrays */
+        if (dxpl_vec_size > H5D_IO_VECTOR_SIZE)
+            vec_size = dxpl_vec_size;
+        else
+            vec_size = H5D_IO_VECTOR_SIZE;
+
+        if (NULL == (dst_len = H5FL_SEQ_MALLOC(size_t, vec_size)))
+            HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O length vector array")
+        if (NULL == (dst_off = H5FL_SEQ_MALLOC(hsize_t, vec_size)))
+            HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O offset vector array")
+        if (NULL == (src_len = H5FL_SEQ_MALLOC(size_t, vec_size)))
+            HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O length vector array")
+        if (NULL == (src_off = H5FL_SEQ_MALLOC(hsize_t, vec_size)))
+            HGOTO_ERROR(H5E_IO, H5E_CANTALLOC, FAIL, "can't allocate I/O offset vector array")
+
+        /* Allocate the dataspace selection iterators */
+        if (NULL == (dst_sel_iter = H5FL_MALLOC(H5S_sel_iter_t)))
+            HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate destination selection iterator")
+        if (NULL == (src_sel_iter = H5FL_MALLOC(H5S_sel_iter_t)))
+            HGOTO_ERROR(H5E_DATASPACE, H5E_CANTALLOC, FAIL, "can't allocate source selection iterator")
+
+        /* Initialize destination selection iterator */
+        if (H5S_select_iter_init(dst_sel_iter, dst_space, elmt_size, sel_iter_flags) < 0)
+            HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
+        dst_sel_iter_init = TRUE; /* Destination selection iteration info has been initialized */
+
+        /* Initialize source selection iterator */
+        if (H5S_select_iter_init(src_sel_iter, src_space, elmt_size, H5S_SEL_ITER_SHARE_WITH_DATASPACE) < 0)
+            HGOTO_ERROR(H5E_DATASPACE, H5E_CANTINIT, FAIL, "unable to initialize selection iterator")
+        src_sel_iter_init = TRUE; /* Source selection iteration info has been initialized */
+
+        /* Initialize sequence counts */
+        curr_dst_seq = curr_src_seq = 0;
+        dst_nseq = src_nseq = 0;
+
+        /* Loop, until all bytes are processed */
+        while (nelmts > 0) {
+            /* Check if more destination buffer sequences are needed */
+            if (curr_dst_seq >= dst_nseq) {
+                /* Get sequences for destination selection */
+                if (H5S_SELECT_ITER_GET_SEQ_LIST(dst_sel_iter, vec_size, nelmts, &dst_nseq, &dst_nelem,
+                                                 dst_off, dst_len) < 0)
+                    HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed")
+
+                /* Start at the beginning of the sequences again */
+                curr_dst_seq = 0;
+            }
+
+            /* Check if more source buffer sequences are needed */
+            if (curr_src_seq >= src_nseq) {
+                /* Get sequences for source selection */
+                if (H5S_SELECT_ITER_GET_SEQ_LIST(src_sel_iter, vec_size, nelmts, &src_nseq, &src_nelem,
+                                                 src_off, src_len) < 0)
+                    HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "sequence length generation failed")
+
+                /* Start at the beginning of the sequences again */
+                curr_src_seq = 0;
+            } /* end if */
+
+            /* Perform vectorized memcpy from src_buf to dst_buf */
+            if ((bytes_copied = H5VM_memcpyvv(dst_buf, dst_nseq, &curr_dst_seq, dst_len, dst_off, src_buf,
+                                              src_nseq, &curr_src_seq, src_len, src_off)) < 0)
+                HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "vectorized memcpy failed")
+
+            /* Decrement number of elements left to process */
+            HDassert(((size_t)bytes_copied % elmt_size) == 0);
+            nelmts -= ((size_t)bytes_copied / elmt_size);
+        }
+    }
+
+done:
+    /* Release selection iterators */
+    if (src_sel_iter) {
+        if (src_sel_iter_init && H5S_SELECT_ITER_RELEASE(src_sel_iter) < 0)
+            HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator")
+
+        src_sel_iter = H5FL_FREE(H5S_sel_iter_t, src_sel_iter);
+    }
+    if (dst_sel_iter) {
+        if (dst_sel_iter_init && H5S_SELECT_ITER_RELEASE(dst_sel_iter) < 0)
+            HDONE_ERROR(H5E_DATASPACE, H5E_CANTRELEASE, FAIL, "unable to release selection iterator")
+
+        dst_sel_iter = H5FL_FREE(H5S_sel_iter_t, dst_sel_iter);
+    }
+
+    /* Release vector arrays, if allocated */
+    if (src_off)
+        src_off = H5FL_SEQ_FREE(hsize_t, src_off);
+    if (src_len)
+        src_len = H5FL_SEQ_FREE(size_t, src_len);
+    if (dst_off)
+        dst_off = H5FL_SEQ_FREE(hsize_t, dst_off);
+    if (dst_len)
+        dst_len = H5FL_SEQ_FREE(size_t, dst_len);
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5D_select_io_mem() */
+
+/*-------------------------------------------------------------------------
  * Function:	H5D__select_read
  *
  * Purpose:	Reads directly from file into application memory.
diff --git a/src/H5FDmpio.c b/src/H5FDmpio.c
index 1969899..4aa8a96 100644
--- a/src/H5FDmpio.c
+++ b/src/H5FDmpio.c
@@ -188,6 +188,41 @@ H5FD__mpio_parse_debug_str(const char *s)
 
     FUNC_LEAVE_NOAPI_VOID
 } /* end H5FD__mpio_parse_debug_str() */
+
+/*---------------------------------------------------------------------------
+ * Function:    H5FD__mem_t_to_str
+ *
+ * Purpose:     Returns a string representing the enum value in an H5FD_mem_t
+ *              enum
+ *
+ * Returns:     H5FD_mem_t enum value string
+ *
+ *---------------------------------------------------------------------------
+ */
+static const char *
+H5FD__mem_t_to_str(H5FD_mem_t mem_type)
+{
+    switch (mem_type) {
+        case H5FD_MEM_NOLIST:
+            return "H5FD_MEM_NOLIST";
+        case H5FD_MEM_DEFAULT:
+            return "H5FD_MEM_DEFAULT";
+        case H5FD_MEM_SUPER:
+            return "H5FD_MEM_SUPER";
+        case H5FD_MEM_BTREE:
+            return "H5FD_MEM_BTREE";
+        case H5FD_MEM_DRAW:
+            return "H5FD_MEM_DRAW";
+        case H5FD_MEM_GHEAP:
+            return "H5FD_MEM_GHEAP";
+        case H5FD_MEM_LHEAP:
+            return "H5FD_MEM_LHEAP";
+        case H5FD_MEM_OHDR:
+            return "H5FD_MEM_OHDR";
+        default:
+            return "(Unknown)";
+    }
+}
 #endif /* H5FDmpio_DEBUG */
 
 /*-------------------------------------------------------------------------
@@ -994,7 +1029,6 @@ H5FD__mpio_query(const H5FD_t H5_ATTR_UNUSED *_file, unsigned long *flags /* out
         *flags |= H5FD_FEAT_AGGREGATE_METADATA;  /* OK to aggregate metadata allocations  */
         *flags |= H5FD_FEAT_AGGREGATE_SMALLDATA; /* OK to aggregate "small" raw data allocations */
         *flags |= H5FD_FEAT_HAS_MPI; /* This driver uses MPI                                             */
-        *flags |= H5FD_FEAT_ALLOCATE_EARLY;         /* Allocate space early instead of late         */
         *flags |= H5FD_FEAT_DEFAULT_VFD_COMPATIBLE; /* VFD creates a file which can be opened with the default
                                                        VFD */
     }                                               /* end if */
@@ -1380,8 +1414,8 @@ H5FD__mpio_read(H5FD_t *_file, H5FD_mem_t H5_ATTR_UNUSED type, hid_t H5_ATTR_UNU
 
 #ifdef H5FDmpio_DEBUG
     if (H5FD_mpio_debug_r_flag)
-        HDfprintf(stderr, "%s: (%d) mpi_off = %ld  bytes_read = %lld\n", __func__, file->mpi_rank,
-                  (long)mpi_off, bytes_read);
+        HDfprintf(stderr, "%s: (%d) mpi_off = %ld  bytes_read = %lld  type = %s\n", __func__, file->mpi_rank,
+                  (long)mpi_off, bytes_read, H5FD__mem_t_to_str(type));
 #endif
 
     /*
@@ -1601,8 +1635,8 @@ H5FD__mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED dxpl_id, h
 
 #ifdef H5FDmpio_DEBUG
     if (H5FD_mpio_debug_w_flag)
-        HDfprintf(stderr, "%s: (%d) mpi_off = %ld  bytes_written = %lld\n", __func__, file->mpi_rank,
-                  (long)mpi_off, bytes_written);
+        HDfprintf(stderr, "%s: (%d) mpi_off = %ld  bytes_written = %lld  type = %s\n", __func__,
+                  file->mpi_rank, (long)mpi_off, bytes_written, H5FD__mem_t_to_str(type));
 #endif
 
     /* Each process will keep track of its perceived EOF value locally, and
diff --git a/src/H5Fmpi.c b/src/H5Fmpi.c
index 78290c6..02d8d52 100644
--- a/src/H5Fmpi.c
+++ b/src/H5Fmpi.c
@@ -524,4 +524,68 @@ H5F_set_coll_metadata_reads(H5F_t *file, H5P_coll_md_read_flag_t *file_flag, hbo
     FUNC_LEAVE_NOAPI_VOID
 } /* end H5F_set_coll_metadata_reads() */
 
+/*-------------------------------------------------------------------------
+ * Function:    H5F_mpi_get_file_block_type
+ *
+ * Purpose:     Creates an MPI derived datatype for communicating an
+ *              H5F_block_t structure. If `commit` is specified as TRUE,
+ *              the resulting datatype will be committed and ready for
+ *              use in communication. Otherwise, the type is only suitable
+ *              for building other derived types.
+ *
+ *              If TRUE is returned through `new_type_derived`, this lets
+ *              the caller know that the datatype has been derived and
+ *              should be freed with MPI_Type_free once it is no longer
+ *              needed.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5F_mpi_get_file_block_type(hbool_t commit, MPI_Datatype *new_type, hbool_t *new_type_derived)
+{
+    MPI_Datatype types[2];
+    MPI_Aint     displacements[2];
+    int          block_lengths[2];
+    int          field_count;
+    int          mpi_code;
+    herr_t       ret_value = SUCCEED;
+
+    FUNC_ENTER_NOAPI(FAIL)
+
+    HDassert(new_type);
+    HDassert(new_type_derived);
+
+    *new_type_derived = FALSE;
+
+    field_count = 2;
+    HDassert(field_count == sizeof(types) / sizeof(MPI_Datatype));
+
+    block_lengths[0] = 1;
+    block_lengths[1] = 1;
+    displacements[0] = offsetof(H5F_block_t, offset);
+    displacements[1] = offsetof(H5F_block_t, length);
+    types[0]         = HADDR_AS_MPI_TYPE;
+    types[1]         = HSIZE_AS_MPI_TYPE;
+    if (MPI_SUCCESS !=
+        (mpi_code = MPI_Type_create_struct(field_count, block_lengths, displacements, types, new_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_create_struct failed", mpi_code)
+    *new_type_derived = TRUE;
+
+    if (commit && MPI_SUCCESS != (mpi_code = MPI_Type_commit(new_type)))
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_commit failed", mpi_code)
+
+done:
+    if (ret_value < 0) {
+        if (*new_type_derived) {
+            if (MPI_SUCCESS != (mpi_code = MPI_Type_free(new_type)))
+                HMPI_DONE_ERROR(FAIL, "MPI_Type_free failed", mpi_code)
+            *new_type_derived = FALSE;
+        }
+    }
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5F_mpi_get_file_block_type() */
+
 #endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h
index af65c9d..67e153e 100644
--- a/src/H5Fprivate.h
+++ b/src/H5Fprivate.h
@@ -962,7 +962,8 @@ H5_DLL MPI_Comm H5F_mpi_get_comm(const H5F_t *f);
 H5_DLL int      H5F_shared_mpi_get_size(const H5F_shared_t *f_sh);
 H5_DLL int      H5F_mpi_get_size(const H5F_t *f);
 H5_DLL herr_t   H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm);
-H5_DLL hbool_t  H5F_get_coll_metadata_reads(const H5F_t *f);
+H5_DLL herr_t  H5F_mpi_get_file_block_type(hbool_t commit, MPI_Datatype *new_type, hbool_t *new_type_derived);
+H5_DLL hbool_t H5F_get_coll_metadata_reads(const H5F_t *f);
 H5_DLL void H5F_set_coll_metadata_reads(H5F_t *f, H5P_coll_md_read_flag_t *file_flag, hbool_t *context_flag);
 #endif /* H5_HAVE_PARALLEL */
 
diff --git a/src/H5mpi.c b/src/H5mpi.c
index aea0104..15fb785 100644
--- a/src/H5mpi.c
+++ b/src/H5mpi.c
@@ -549,4 +549,237 @@ done:
     FUNC_LEAVE_NOAPI(ret_value)
 } /* end H5_mpio_create_large_type() */
 
+/*-------------------------------------------------------------------------
+ * Function:    H5_mpio_gatherv_alloc
+ *
+ * Purpose:     A wrapper around MPI_(All)gatherv that performs allocation
+ *              of the receive buffer on the caller's behalf. This
+ *              routine's parameters are as follows:
+ *
+ *              `send_buf` - The buffer that data will be sent from for
+ *                           the calling MPI rank. Analogous to
+ *                           MPI_(All)gatherv's `sendbuf` parameter.
+ *
+ *              `send_count` - The number of `send_type` elements in the
+ *                             send buffer. Analogous to MPI_(All)gatherv's
+ *                             `sendcount` parameter.
+ *
+ *              `send_type` - The MPI Datatype of the elements in the send
+ *                            buffer. Analogous to MPI_(All)gatherv's
+ *                            `sendtype` parameter.
+ *
+ *              `recv_counts` - An array containing the number of elements
+ *                              to be received from each MPI rank.
+ *                              Analogous to MPI_(All)gatherv's `recvcount`
+ *                              parameter.
+ *
+ *              `displacements` - An array containing the displacements
+ *                                in the receive buffer where data from
+ *                                each MPI rank should be placed. Analogous
+ *                                to MPI_(All)gatherv's `displs` parameter.
+ *
+ *              `recv_type` - The MPI Datatype of the elements in the
+ *                            receive buffer. Analogous to
+ *                            MPI_(All)gatherv's `recvtype` parameter.
+ *
+ *              `allgather` - Specifies whether the gather operation to be
+ *                            performed should be MPI_Allgatherv (TRUE) or
+ *                            MPI_Gatherv (FALSE).
+ *
+ *              `root` - For MPI_Gatherv operations, specifies the rank
+ *                       that will receive the data sent by other ranks.
+ *                       Analogous to MPI_Gatherv's `root` parameter. For
+ *                       MPI_Allgatherv operations, this parameter is
+ *                       ignored.
+ *
+ *              `comm` - Specifies the MPI Communicator for the operation.
+ *                       Analogous to MPI_(All)gatherv's `comm` parameter.
+ *
+ *              `mpi_rank` - Specifies the calling rank's rank value, as
+ *                           obtained by calling MPI_Comm_rank on the
+ *                           MPI Communicator `comm`.
+ *
+ *              `mpi_size` - Specifies the MPI Communicator size, as
+ *                           obtained by calling MPI_Comm_size on the
+ *                           MPI Communicator `comm`.
+ *
+ *              `out_buf` - Resulting buffer that is allocated and
+ *                          returned to the caller after data has been
+ *                          gathered into it. Returned only to the rank
+ *                          specified by `root` for MPI_Gatherv
+ *                          operations, or to all ranks for
+ *                          MPI_Allgatherv operations.
+ *
+ *              `out_buf_num_entries` - The number of elements in the
+ *                                      resulting buffer, in terms of
+ *                                      the MPI Datatype provided for
+ *                                      `recv_type`.
+ *
+ * Notes:       This routine is collective across `comm`.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5_mpio_gatherv_alloc(void *send_buf, int send_count, MPI_Datatype send_type, const int recv_counts[],
+                      const int displacements[], MPI_Datatype recv_type, hbool_t allgather, int root,
+                      MPI_Comm comm, int mpi_rank, int mpi_size, void **out_buf, size_t *out_buf_num_entries)
+{
+    size_t recv_buf_num_entries = 0;
+    void * recv_buf             = NULL;
+#if MPI_VERSION >= 3
+    MPI_Count type_lb;
+    MPI_Count type_extent;
+#else
+    MPI_Aint type_lb;
+    MPI_Aint type_extent;
+#endif
+    int    mpi_code;
+    herr_t ret_value = SUCCEED;
+
+    FUNC_ENTER_NOAPI(FAIL)
+
+    HDassert(send_buf || send_count == 0);
+    if (allgather || (mpi_rank == root))
+        HDassert(out_buf && out_buf_num_entries);
+
+        /* Retrieve the extent of the MPI Datatype being used */
+#if MPI_VERSION >= 3
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_get_extent_x(recv_type, &type_lb, &type_extent)))
+#else
+    if (MPI_SUCCESS != (mpi_code = MPI_Type_get_extent(recv_type, &type_lb, &type_extent)))
+#endif
+        HMPI_GOTO_ERROR(FAIL, "MPI_Type_get_extent(_x) failed", mpi_code)
+
+    if (type_extent < 0)
+        HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "MPI recv_type had a negative extent")
+
+    /*
+     * Calculate the total size of the buffer being
+     * returned and allocate it
+     */
+    if (allgather || (mpi_rank == root)) {
+        size_t i;
+        size_t buf_size;
+
+        for (i = 0, recv_buf_num_entries = 0; i < (size_t)mpi_size; i++)
+            recv_buf_num_entries += (size_t)recv_counts[i];
+        buf_size = recv_buf_num_entries * (size_t)type_extent;
+
+        /* If our buffer size is 0, there's nothing to do */
+        if (buf_size == 0)
+            HGOTO_DONE(SUCCEED)
+
+        if (NULL == (recv_buf = H5MM_malloc(buf_size)))
+            /* Push an error, but still participate in collective gather operation */
+            HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate receive buffer")
+    }
+
+    /* Perform gather operation */
+    if (allgather) {
+        if (MPI_SUCCESS != (mpi_code = MPI_Allgatherv(send_buf, send_count, send_type, recv_buf, recv_counts,
+                                                      displacements, recv_type, comm)))
+            HMPI_GOTO_ERROR(FAIL, "MPI_Allgatherv failed", mpi_code)
+    }
+    else {
+        if (MPI_SUCCESS != (mpi_code = MPI_Gatherv(send_buf, send_count, send_type, recv_buf, recv_counts,
+                                                   displacements, recv_type, root, comm)))
+            HMPI_GOTO_ERROR(FAIL, "MPI_Gatherv failed", mpi_code)
+    }
+
+    if (allgather || (mpi_rank == root)) {
+        *out_buf             = recv_buf;
+        *out_buf_num_entries = recv_buf_num_entries;
+    }
+
+done:
+    if (ret_value < 0) {
+        if (recv_buf)
+            H5MM_free(recv_buf);
+    }
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5_mpio_gatherv_alloc() */
+
+/*-------------------------------------------------------------------------
+ * Function:    H5_mpio_gatherv_alloc_simple
+ *
+ * Purpose:     A slightly simplified interface to H5_mpio_gatherv_alloc
+ *              which calculates the receive counts and receive buffer
+ *              displacements for the caller.
+ *
+ * Notes:       This routine is collective across `comm`.
+ *
+ * Return:      Non-negative on success/Negative on failure
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5_mpio_gatherv_alloc_simple(void *send_buf, int send_count, MPI_Datatype send_type, MPI_Datatype recv_type,
+                             hbool_t allgather, int root, MPI_Comm comm, int mpi_rank, int mpi_size,
+                             void **out_buf, size_t *out_buf_num_entries)
+{
+    int *  recv_counts_disps_array = NULL;
+    int    mpi_code;
+    herr_t ret_value = SUCCEED;
+
+    FUNC_ENTER_NOAPI(FAIL)
+
+    HDassert(send_buf || send_count == 0);
+    if (allgather || (mpi_rank == root))
+        HDassert(out_buf && out_buf_num_entries);
+
+    /*
+     * Allocate array to store the receive counts of each rank, as well as
+     * the displacements into the final array where each rank will place
+     * their data. The first half of the array contains the receive counts
+     * (in rank order), while the latter half contains the displacements
+     * (also in rank order).
+     */
+    if (allgather || (mpi_rank == root)) {
+        if (NULL ==
+            (recv_counts_disps_array = H5MM_malloc(2 * (size_t)mpi_size * sizeof(*recv_counts_disps_array))))
+            /* Push an error, but still participate in collective gather operation */
+            HDONE_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
+                        "couldn't allocate receive counts and displacements array")
+    }
+
+    /* Collect each rank's send count to interested ranks */
+    if (allgather) {
+        if (MPI_SUCCESS !=
+            (mpi_code = MPI_Allgather(&send_count, 1, MPI_INT, recv_counts_disps_array, 1, MPI_INT, comm)))
+            HMPI_GOTO_ERROR(FAIL, "MPI_Allgather failed", mpi_code)
+    }
+    else {
+        if (MPI_SUCCESS !=
+            (mpi_code = MPI_Gather(&send_count, 1, MPI_INT, recv_counts_disps_array, 1, MPI_INT, root, comm)))
+            HMPI_GOTO_ERROR(FAIL, "MPI_Gather failed", mpi_code)
+    }
+
+    /* Set the displacements into the receive buffer for the gather operation */
+    if (allgather || (mpi_rank == root)) {
+        size_t i;
+        int *  displacements_ptr;
+
+        displacements_ptr = &recv_counts_disps_array[mpi_size];
+
+        *displacements_ptr = 0;
+        for (i = 1; i < (size_t)mpi_size; i++)
+            displacements_ptr[i] = displacements_ptr[i - 1] + recv_counts_disps_array[i - 1];
+    }
+
+    /* Perform gather operation */
+    if (H5_mpio_gatherv_alloc(send_buf, send_count, send_type, recv_counts_disps_array,
+                              &recv_counts_disps_array[mpi_size], recv_type, allgather, root, comm, mpi_rank,
+                              mpi_size, out_buf, out_buf_num_entries) < 0)
+        HGOTO_ERROR(H5E_LIB, H5E_CANTGATHER, FAIL, "can't gather data")
+
+done:
+    if (recv_counts_disps_array)
+        H5MM_free(recv_counts_disps_array);
+
+    FUNC_LEAVE_NOAPI(ret_value)
+} /* end H5_mpio_gatherv_alloc_simple() */
+
 #endif /* H5_HAVE_PARALLEL */
diff --git a/src/H5private.h b/src/H5private.h
index 68aabc2..d67163f 100644
--- a/src/H5private.h
+++ b/src/H5private.h
@@ -387,6 +387,25 @@
 #define HSSIZET_MAX ((hssize_t)LLONG_MAX)
 #define HSSIZET_MIN (~(HSSIZET_MAX))
 
+#ifdef H5_HAVE_PARALLEL
+
+/* Define a type for safely sending size_t values with MPI */
+#if SIZE_MAX == UCHAR_MAX
+#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_CHAR
+#elif SIZE_MAX == USHRT_MAX
+#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_SHORT
+#elif SIZE_MAX == UINT_MAX
+#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED
+#elif SIZE_MAX == ULONG_MAX
+#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_LONG
+#elif SIZE_MAX == ULLONG_MAX
+#define H5_SIZE_T_AS_MPI_TYPE MPI_UNSIGNED_LONG_LONG
+#else
+#error "no suitable MPI type for size_t"
+#endif
+
+#endif /* H5_HAVE_PARALLEL */
+
 /*
  * Types and max sizes for POSIX I/O.
  * OS X (Darwin) is odd since the max I/O size does not match the types.
@@ -508,6 +527,9 @@
 #define H5_GCC_CLANG_DIAG_ON(x)
 #endif
 
+/* Function pointer typedef for qsort */
+typedef int (*H5_sort_func_cb_t)(const void *, const void *);
+
 /* Typedefs and functions for timing certain parts of the library. */
 
 /* A set of elapsed/user/system times emitted as a time point by the
@@ -2617,6 +2639,14 @@ H5_DLL herr_t  H5_mpi_comm_cmp(MPI_Comm comm1, MPI_Comm comm2, int *result);
 H5_DLL herr_t  H5_mpi_info_cmp(MPI_Info info1, MPI_Info info2, int *result);
 H5_DLL herr_t  H5_mpio_create_large_type(hsize_t num_elements, MPI_Aint stride_bytes, MPI_Datatype old_type,
                                          MPI_Datatype *new_type);
+H5_DLL herr_t  H5_mpio_gatherv_alloc(void *send_buf, int send_count, MPI_Datatype send_type,
+                                     const int recv_counts[], const int displacements[],
+                                     MPI_Datatype recv_type, hbool_t allgather, int root, MPI_Comm comm,
+                                     int mpi_rank, int mpi_size, void **out_buf, size_t *out_buf_num_entries);
+H5_DLL herr_t  H5_mpio_gatherv_alloc_simple(void *send_buf, int send_count, MPI_Datatype send_type,
+                                            MPI_Datatype recv_type, hbool_t allgather, int root, MPI_Comm comm,
+                                            int mpi_rank, int mpi_size, void **out_buf,
+                                            size_t *out_buf_num_entries);
 #endif /* H5_HAVE_PARALLEL */
 
 /* Functions for debugging */
diff --git a/src/H5public.h b/src/H5public.h
index 6a3911c..037501b 100644
--- a/src/H5public.h
+++ b/src/H5public.h
@@ -289,6 +289,11 @@ typedef long long ssize_t;
  * \internal Defined as a (minimum) 64-bit integer type.
  */
 typedef uint64_t hsize_t;
+
+#ifdef H5_HAVE_PARALLEL
+#define HSIZE_AS_MPI_TYPE MPI_UINT64_T
+#endif
+
 /**
  * The size of file objects. Used when negative values are needed to indicate errors.
  *
@@ -323,7 +328,7 @@ typedef uint64_t haddr_t;
 #define HADDR_MAX           (HADDR_UNDEF - 1)
 
 #ifdef H5_HAVE_PARALLEL
-#define HADDR_AS_MPI_TYPE MPI_LONG_LONG_INT
+#define HADDR_AS_MPI_TYPE MPI_UINT64_T
 #endif
 
 //! <!-- [H5_iter_order_t_snip] -->
diff --git a/testpar/t_2Gio.c b/testpar/t_2Gio.c
index 2be4ae4..911be2c 100644
--- a/testpar/t_2Gio.c
+++ b/testpar/t_2Gio.c
@@ -3047,7 +3047,7 @@ compress_readAll(void)
                     nerrors++;
                 }
 
-#if MPI_VERSION >= 3
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
             ret = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read);
             VRFY((ret >= 0), "H5Dwrite succeeded");
 #endif
@@ -3853,12 +3853,6 @@ actual_io_mode_tests(void)
  *       TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL:
  *         Test for Externl-File storage as the cause of breaking collective I/O.
  *
- *       TEST_FILTERS:
- *         Test for using filter (checksum) as the cause of breaking collective I/O.
- *         Note: TEST_FILTERS mode will not work until H5Dcreate and H5write is supported for mpio and filter
- * feature. Use test_no_collective_cause_mode_filter() function instead.
- *
- *
  * Programmer: Jonathan Kim
  * Date: Aug, 2012
  */
@@ -3898,9 +3892,6 @@ test_no_collective_cause_mode(int selection_mode)
     hid_t       file_space = -1;
     hsize_t     chunk_dims[MAX_RANK];
     herr_t      ret;
-#ifdef LATER /* fletcher32 */
-    H5Z_filter_t filter_info;
-#endif /* LATER */
     /* set to global value as default */
     int  l_facc_type = facc_type;
     char message[256];
@@ -3932,21 +3923,6 @@ test_no_collective_cause_mode(int selection_mode)
         is_chunked = 0;
     }
 
-#ifdef LATER /* fletcher32 */
-    if (selection_mode & TEST_FILTERS) {
-        ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32);
-        VRFY((ret >= 0), "Fletcher32 filter is available.\n");
-
-        ret = H5Zget_filter_info(H5Z_FILTER_FLETCHER32, &filter_info);
-        VRFY(((filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) ||
-              (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED)),
-             "Fletcher32 filter encoding and decoding available.\n");
-
-        ret = H5Pset_fletcher32(dcpl);
-        VRFY((ret >= 0), "set filter (flecher32) succeeded");
-    }
-#endif /* LATER */
-
     if (selection_mode & TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES) {
         sid = H5Screate(H5S_NULL);
         VRFY((sid >= 0), "H5Screate_simple succeeded");
@@ -4022,14 +3998,6 @@ test_no_collective_cause_mode(int selection_mode)
         no_collective_cause_global_expected |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
     }
 
-#ifdef LATER /* fletcher32 */
-    if (selection_mode & TEST_FILTERS) {
-        test_name = "Broken Collective I/O - Filter is required";
-        no_collective_cause_local_expected |= H5D_MPIO_FILTERS;
-        no_collective_cause_global_expected |= H5D_MPIO_FILTERS;
-    }
-#endif /* LATER */
-
     if (selection_mode & TEST_COLLECTIVE) {
         test_name                           = "Broken Collective I/O - Not Broken";
         no_collective_cause_local_expected  = H5D_MPIO_COLLECTIVE;
@@ -4166,240 +4134,6 @@ test_no_collective_cause_mode(int selection_mode)
     return;
 }
 
-#if 0
-/*
- * Function: test_no_collective_cause_mode_filter
- *
- * Purpose:
- *    Test specific for using filter as a caus of broken collective I/O and
- *    checks that the H5Pget_mpio_no_collective_cause properties in the DXPL
- *    have the correct values.
- *
- * NOTE:
- *    This is a temporary function.
- *    test_no_collective_cause_mode(TEST_FILTERS) will replace this when
- *    H5Dcreate and H5write support for mpio and filter feature.
- *
- * Input:
- *     TEST_FILTERS_READ:
- *       Test for using filter (checksum) as the cause of breaking collective I/O.
- *
- * Programmer: Jonathan Kim
- * Date: Aug, 2012
- */
-static void
-test_no_collective_cause_mode_filter(int selection_mode)
-{
-    uint32_t no_collective_cause_local_read = 0;
-    uint32_t no_collective_cause_local_expected = 0;
-    uint32_t no_collective_cause_global_read = 0;
-    uint32_t no_collective_cause_global_expected = 0;
-
-    const char  * filename;
-    const char  * test_name;
-    hbool_t     is_chunked=1;
-    int         mpi_size = -1;
-    int         mpi_rank = -1;
-    int         length;
-    int         * buffer;
-    int         i;
-    MPI_Comm    mpi_comm = MPI_COMM_NULL;
-    MPI_Info    mpi_info = MPI_INFO_NULL;
-    hid_t       fid = -1;
-    hid_t       sid = -1;
-    hid_t       dataset = -1;
-    hid_t       data_type = H5T_NATIVE_INT;
-    hid_t       fapl_write = -1;
-    hid_t       fapl_read = -1;
-    hid_t       dcpl = -1;
-    hid_t       dxpl = -1;
-    hsize_t     dims[MAX_RANK];
-    hid_t       mem_space = -1;
-    hid_t       file_space = -1;
-    hsize_t     chunk_dims[MAX_RANK];
-    herr_t      ret;
-#ifdef LATER /* fletcher32 */
-    H5Z_filter_t filter_info;
-#endif       /* LATER */
-    char message[256];
-
-    /* Set up MPI parameters */
-    MPI_Comm_size(test_comm, &mpi_size);
-    MPI_Comm_rank(test_comm, &mpi_rank);
-
-    MPI_Barrier(test_comm);
-
-    HDassert(mpi_size >= 1);
-
-    mpi_comm = test_comm;
-    mpi_info = MPI_INFO_NULL;
-
-    /* Create the dataset creation plist */
-    dcpl = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((dcpl >= 0), "dataset creation plist created successfully");
-
-    if (selection_mode == TEST_FILTERS_READ )  {
-#ifdef LATER /* fletcher32 */
-            ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32);
-            VRFY ((ret >=0 ), "Fletcher32 filter is available.\n");
-
-            ret = H5Zget_filter_info (H5Z_FILTER_FLETCHER32, (unsigned int *) &filter_info);
-            VRFY ( ( (filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) || (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED) ) , "Fletcher32 filter encoding and decoding available.\n");
-
-            ret = H5Pset_fletcher32(dcpl);
-            VRFY((ret >= 0),"set filter (flecher32) succeeded");
-#endif       /* LATER */
-    }
-    else  {
-        VRFY(0, "Unexpected mode, only test for TEST_FILTERS_READ.");
-    }
-
-    /* Create the basic Space */
-    dims[0] = dim0;
-    dims[1] = dim1;
-    sid = H5Screate_simple (MAX_RANK, dims, NULL);
-    VRFY((sid >= 0), "H5Screate_simple succeeded");
-
-
-    filename = (const char *)GetTestParameters();
-    HDassert(filename != NULL);
-
-    /* Setup the file access template */
-    fapl_write = create_faccess_plist(mpi_comm, mpi_info, FACC_DEFAULT);
-    VRFY((fapl_write >= 0), "create_faccess_plist() succeeded");
-
-    fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_write);
-    VRFY((fid >= 0), "H5Fcreate succeeded");
-
-    /* If we are not testing contiguous datasets */
-    if(is_chunked) {
-        /* Set up chunk information.  */
-        chunk_dims[0] = dims[0]/mpi_size;
-        chunk_dims[1] = dims[1];
-        ret = H5Pset_chunk(dcpl, 2, chunk_dims);
-        VRFY((ret >= 0),"chunk creation property list succeeded");
-    }
-
-
-    /* Create the dataset */
-    dataset = H5Dcreate2(fid, DSET_NOCOLCAUSE, data_type, sid, H5P_DEFAULT, dcpl, H5P_DEFAULT);
-    VRFY((dataset >= 0), "H5Dcreate2() dataset succeeded");
-
-#ifdef LATER /* fletcher32 */
-    /* Set expected cause */
-    test_name = "Broken Collective I/O - Filter is required";
-    no_collective_cause_local_expected = H5D_MPIO_FILTERS;
-    no_collective_cause_global_expected = H5D_MPIO_FILTERS;
-#endif       /* LATER */
-
-    /* Get the file dataspace */
-    file_space = H5Dget_space(dataset);
-    VRFY((file_space >= 0), "H5Dget_space succeeded");
-
-    /* Create the memory dataspace */
-    mem_space = H5Screate_simple (MAX_RANK, dims, NULL);
-    VRFY((mem_space >= 0), "mem_space created");
-
-    /* Get the number of elements in the selection */
-    length = dim0 * dim1;
-
-    /* Allocate and initialize the buffer */
-    buffer = (int *)HDmalloc(sizeof(int) * length);
-    VRFY((buffer != NULL), "HDmalloc of buffer succeeded");
-    for(i = 0; i < length; i++)
-        buffer[i] = i;
-
-    /* Set up the dxpl for the write */
-    dxpl = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((dxpl >= 0), "H5Pcreate(H5P_DATASET_XFER) succeeded");
-
-    if (selection_mode == TEST_FILTERS_READ)  {
-        /* To test read in collective I/O mode , write in independent mode
-         * because write fails with mpio + filter */
-        ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT);
-        VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
-    }
-    else  {
-        /* To test write in collective I/O mode. */
-        ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
-        VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
-    }
-
-
-    /* Write */
-    ret = H5Dwrite(dataset, data_type, mem_space, file_space, dxpl, buffer);
-
-    if(ret < 0) H5Eprint2(H5E_DEFAULT, stdout);
-    VRFY((ret >= 0), "H5Dwrite() dataset multichunk write succeeded");
-
-
-    /* Make a copy of the dxpl to test the read operation */
-    dxpl = H5Pcopy(dxpl);
-    VRFY((dxpl >= 0), "H5Pcopy succeeded");
-
-    if (dataset)
-        H5Dclose(dataset);
-    if (fapl_write)
-        H5Pclose(fapl_write);
-    if (fid)
-        H5Fclose(fid);
-
-
-    /*---------------------
-     * Test Read access
-     *---------------------*/
-
-    /* Setup the file access template */
-    fapl_read = create_faccess_plist(mpi_comm, mpi_info, facc_type);
-    VRFY((fapl_read >= 0), "create_faccess_plist() succeeded");
-
-    fid = H5Fopen (filename, H5F_ACC_RDONLY, fapl_read);
-    dataset = H5Dopen2 (fid, DSET_NOCOLCAUSE, H5P_DEFAULT);
-
-    /* Set collective I/O properties in the dxpl. */
-    ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
-    VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
-
-    /* Read */
-    ret = H5Dread(dataset, data_type, mem_space, file_space, dxpl, buffer);
-
-    if(ret < 0) H5Eprint2(H5E_DEFAULT, stdout);
-    VRFY((ret >= 0), "H5Dread() dataset multichunk read succeeded");
-
-    /* Get the cause of broken collective I/O */
-    ret = H5Pget_mpio_no_collective_cause (dxpl, &no_collective_cause_local_read, &no_collective_cause_global_read);
-    VRFY((ret >= 0), "retrieving no collective cause succeeded" );
-
-    /* Test values */
-    HDmemset (message, 0, sizeof (message));
-    HDsprintf(message, "Local cause of Broken Collective I/O has the correct value for %s.\n",test_name);
-    VRFY((no_collective_cause_local_read == (uint32_t)no_collective_cause_local_expected), message);
-    HDmemset (message, 0, sizeof (message));
-    HDsprintf(message, "Global cause of Broken Collective I/O has the correct value for %s.\n",test_name);
-    VRFY((no_collective_cause_global_read == (uint32_t)no_collective_cause_global_expected), message);
-
-    /* Release some resources */
-    if (sid)
-        H5Sclose(sid);
-    if (fapl_read)
-        H5Pclose(fapl_read);
-    if (dcpl)
-        H5Pclose(dcpl);
-    if (dxpl)
-        H5Pclose(dxpl);
-    if (dataset)
-        H5Dclose(dataset);
-    if (mem_space)
-        H5Sclose(mem_space);
-    if (file_space)
-        H5Sclose(file_space);
-    if (fid)
-        H5Fclose(fid);
-    HDfree(buffer);
-    return;
-}
-#endif
-
 /* Function: no_collective_cause_tests
  *
  * Purpose: Tests cases for broken collective IO.
@@ -4420,13 +4154,6 @@ no_collective_cause_tests(void)
     test_no_collective_cause_mode(TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES);
     test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_COMPACT);
     test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL);
-#ifdef LATER /* fletcher32 */
-    /* TODO: use this instead of below TEST_FILTERS_READ when H5Dcreate and
-     * H5Dwrite is ready for mpio + filter feature.
-     */
-    /* test_no_collective_cause_mode (TEST_FILTERS); */
-    test_no_collective_cause_mode_filter(TEST_FILTERS_READ);
-#endif /* LATER */
 
     /*
      * Test combined causes
diff --git a/testpar/t_dset.c b/testpar/t_dset.c
index 2aade32..8616bef 100644
--- a/testpar/t_dset.c
+++ b/testpar/t_dset.c
@@ -2605,7 +2605,7 @@ compress_readAll(void)
                     nerrors++;
                 }
 
-#if MPI_VERSION >= 3
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
             ret = H5Dwrite(dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, xfer_plist, data_read);
             VRFY((ret >= 0), "H5Dwrite succeeded");
 #endif
@@ -3418,12 +3418,6 @@ actual_io_mode_tests(void)
  *       TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL:
  *         Test for Externl-File storage as the cause of breaking collective I/O.
  *
- *       TEST_FILTERS:
- *         Test for using filter (checksum) as the cause of breaking collective I/O.
- *         Note: TEST_FILTERS mode will not work until H5Dcreate and H5write is supported for mpio and filter
- * feature. Use test_no_collective_cause_mode_filter() function instead.
- *
- *
  * Programmer: Jonathan Kim
  * Date: Aug, 2012
  */
@@ -3465,9 +3459,6 @@ test_no_collective_cause_mode(int selection_mode)
     hid_t       file_space = -1;
     hsize_t     chunk_dims[RANK];
     herr_t      ret;
-#ifdef LATER /* fletcher32 */
-    H5Z_filter_t filter_info;
-#endif /* LATER */
     /* set to global value as default */
     int  l_facc_type = facc_type;
     char message[256];
@@ -3499,21 +3490,6 @@ test_no_collective_cause_mode(int selection_mode)
         is_chunked = 0;
     }
 
-#ifdef LATER /* fletcher32 */
-    if (selection_mode & TEST_FILTERS) {
-        ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32);
-        VRFY((ret >= 0), "Fletcher32 filter is available.\n");
-
-        ret = H5Zget_filter_info(H5Z_FILTER_FLETCHER32, &filter_info);
-        VRFY(((filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) ||
-              (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED)),
-             "Fletcher32 filter encoding and decoding available.\n");
-
-        ret = H5Pset_fletcher32(dcpl);
-        VRFY((ret >= 0), "set filter (flecher32) succeeded");
-    }
-#endif /* LATER */
-
     if (selection_mode & TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES) {
         sid = H5Screate(H5S_NULL);
         VRFY((sid >= 0), "H5Screate_simple succeeded");
@@ -3589,14 +3565,6 @@ test_no_collective_cause_mode(int selection_mode)
         no_collective_cause_global_expected |= H5D_MPIO_NOT_CONTIGUOUS_OR_CHUNKED_DATASET;
     }
 
-#ifdef LATER /* fletcher32 */
-    if (selection_mode & TEST_FILTERS) {
-        test_name = "Broken Collective I/O - Filter is required";
-        no_collective_cause_local_expected |= H5D_MPIO_FILTERS;
-        no_collective_cause_global_expected |= H5D_MPIO_FILTERS;
-    }
-#endif /* LATER */
-
     if (selection_mode & TEST_COLLECTIVE) {
         test_name                           = "Broken Collective I/O - Not Broken";
         no_collective_cause_local_expected  = H5D_MPIO_COLLECTIVE;
@@ -3735,242 +3703,6 @@ test_no_collective_cause_mode(int selection_mode)
     return;
 }
 
-/*
- * Function: test_no_collective_cause_mode_filter
- *
- * Purpose:
- *    Test specific for using filter as a caus of broken collective I/O and
- *    checks that the H5Pget_mpio_no_collective_cause properties in the DXPL
- *    have the correct values.
- *
- * NOTE:
- *    This is a temporary function.
- *    test_no_collective_cause_mode(TEST_FILTERS) will replace this when
- *    H5Dcreate and H5write support for mpio and filter feature.
- *
- * Input:
- *     TEST_FILTERS_READ:
- *       Test for using filter (checksum) as the cause of breaking collective I/O.
- *
- * Programmer: Jonathan Kim
- * Date: Aug, 2012
- */
-#ifdef LATER
-static void
-test_no_collective_cause_mode_filter(int selection_mode)
-{
-    uint32_t no_collective_cause_local_read      = 0;
-    uint32_t no_collective_cause_local_expected  = 0;
-    uint32_t no_collective_cause_global_read     = 0;
-    uint32_t no_collective_cause_global_expected = 0;
-
-    const char *filename;
-    const char *test_name  = "I/O";
-    hbool_t     is_chunked = 1;
-    int         mpi_size   = -1;
-    int         mpi_rank   = -1;
-    int         length;
-    int *       buffer;
-    int         i;
-    MPI_Comm    mpi_comm   = MPI_COMM_NULL;
-    MPI_Info    mpi_info   = MPI_INFO_NULL;
-    hid_t       fid        = -1;
-    hid_t       sid        = -1;
-    hid_t       dataset    = -1;
-    hid_t       data_type  = H5T_NATIVE_INT;
-    hid_t       fapl_write = -1;
-    hid_t       fapl_read  = -1;
-    hid_t       dcpl       = -1;
-    hid_t       dxpl       = -1;
-    hsize_t     dims[RANK];
-    hid_t       mem_space  = -1;
-    hid_t       file_space = -1;
-    hsize_t     chunk_dims[RANK];
-    herr_t      ret;
-#ifdef LATER /* fletcher32 */
-    H5Z_filter_t filter_info;
-#endif /* LATER */
-    char message[256];
-
-    /* Set up MPI parameters */
-    MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
-    MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
-
-    MPI_Barrier(MPI_COMM_WORLD);
-
-    HDassert(mpi_size >= 1);
-
-    mpi_comm = MPI_COMM_WORLD;
-    mpi_info = MPI_INFO_NULL;
-
-    /* Create the dataset creation plist */
-    dcpl = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((dcpl >= 0), "dataset creation plist created successfully");
-
-    if (selection_mode == TEST_FILTERS_READ) {
-#ifdef LATER /* fletcher32 */
-        ret = H5Zfilter_avail(H5Z_FILTER_FLETCHER32);
-        VRFY((ret >= 0), "Fletcher32 filter is available.\n");
-
-        ret = H5Zget_filter_info(H5Z_FILTER_FLETCHER32, (unsigned int *)&filter_info);
-        VRFY(((filter_info & H5Z_FILTER_CONFIG_ENCODE_ENABLED) ||
-              (filter_info & H5Z_FILTER_CONFIG_DECODE_ENABLED)),
-             "Fletcher32 filter encoding and decoding available.\n");
-
-        ret = H5Pset_fletcher32(dcpl);
-        VRFY((ret >= 0), "set filter (flecher32) succeeded");
-#endif /* LATER */
-    }
-    else {
-        VRFY(0, "Unexpected mode, only test for TEST_FILTERS_READ.");
-    }
-
-    /* Create the basic Space */
-    dims[0] = (hsize_t)dim0;
-    dims[1] = (hsize_t)dim1;
-    sid     = H5Screate_simple(RANK, dims, NULL);
-    VRFY((sid >= 0), "H5Screate_simple succeeded");
-
-    filename = (const char *)GetTestParameters();
-    HDassert(filename != NULL);
-
-    /* Setup the file access template */
-    fapl_write = create_faccess_plist(mpi_comm, mpi_info, FACC_DEFAULT);
-    VRFY((fapl_write >= 0), "create_faccess_plist() succeeded");
-
-    fid = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_write);
-    VRFY((fid >= 0), "H5Fcreate succeeded");
-
-    /* If we are not testing contiguous datasets */
-    if (is_chunked) {
-        /* Set up chunk information.  */
-        chunk_dims[0] = dims[0] / (hsize_t)mpi_size;
-        chunk_dims[1] = dims[1];
-        ret           = H5Pset_chunk(dcpl, 2, chunk_dims);
-        VRFY((ret >= 0), "chunk creation property list succeeded");
-    }
-
-    /* Create the dataset */
-    dataset = H5Dcreate2(fid, DSET_NOCOLCAUSE, data_type, sid, H5P_DEFAULT, dcpl, H5P_DEFAULT);
-    VRFY((dataset >= 0), "H5Dcreate2() dataset succeeded");
-
-#ifdef LATER /* fletcher32 */
-    /* Set expected cause */
-    test_name                           = "Broken Collective I/O - Filter is required";
-    no_collective_cause_local_expected  = H5D_MPIO_FILTERS;
-    no_collective_cause_global_expected = H5D_MPIO_FILTERS;
-#endif /* LATER */
-
-    /* Get the file dataspace */
-    file_space = H5Dget_space(dataset);
-    VRFY((file_space >= 0), "H5Dget_space succeeded");
-
-    /* Create the memory dataspace */
-    mem_space = H5Screate_simple(RANK, dims, NULL);
-    VRFY((mem_space >= 0), "mem_space created");
-
-    /* Get the number of elements in the selection */
-    length = dim0 * dim1;
-
-    /* Allocate and initialize the buffer */
-    buffer = (int *)HDmalloc(sizeof(int) * length);
-    VRFY((buffer != NULL), "HDmalloc of buffer succeeded");
-    for (i = 0; i < length; i++)
-        buffer[i] = i;
-
-    /* Set up the dxpl for the write */
-    dxpl = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((dxpl >= 0), "H5Pcreate(H5P_DATASET_XFER) succeeded");
-
-    if (selection_mode == TEST_FILTERS_READ) {
-        /* To test read in collective I/O mode , write in independent mode
-         * because write fails with mpio + filter */
-        ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_INDEPENDENT);
-        VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
-    }
-    else {
-        /* To test write in collective I/O mode. */
-        ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
-        VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
-    }
-
-    /* Write */
-    ret = H5Dwrite(dataset, data_type, mem_space, file_space, dxpl, buffer);
-
-    if (ret < 0)
-        H5Eprint2(H5E_DEFAULT, stdout);
-    VRFY((ret >= 0), "H5Dwrite() dataset multichunk write succeeded");
-
-    /* Make a copy of the dxpl to test the read operation */
-    dxpl = H5Pcopy(dxpl);
-    VRFY((dxpl >= 0), "H5Pcopy succeeded");
-
-    if (dataset)
-        H5Dclose(dataset);
-    if (fapl_write)
-        H5Pclose(fapl_write);
-    if (fid)
-        H5Fclose(fid);
-
-    /*---------------------
-     * Test Read access
-     *---------------------*/
-
-    /* Setup the file access template */
-    fapl_read = create_faccess_plist(mpi_comm, mpi_info, facc_type);
-    VRFY((fapl_read >= 0), "create_faccess_plist() succeeded");
-
-    fid     = H5Fopen(filename, H5F_ACC_RDONLY, fapl_read);
-    dataset = H5Dopen2(fid, DSET_NOCOLCAUSE, H5P_DEFAULT);
-
-    /* Set collective I/O properties in the dxpl. */
-    ret = H5Pset_dxpl_mpio(dxpl, H5FD_MPIO_COLLECTIVE);
-    VRFY((ret >= 0), "H5Pset_dxpl_mpio succeeded");
-
-    /* Read */
-    ret = H5Dread(dataset, data_type, mem_space, file_space, dxpl, buffer);
-
-    if (ret < 0)
-        H5Eprint2(H5E_DEFAULT, stdout);
-    VRFY((ret >= 0), "H5Dread() dataset multichunk read succeeded");
-
-    /* Get the cause of broken collective I/O */
-    ret = H5Pget_mpio_no_collective_cause(dxpl, &no_collective_cause_local_read,
-                                          &no_collective_cause_global_read);
-    VRFY((ret >= 0), "retrieving no collective cause succeeded");
-
-    /* Test values */
-    HDmemset(message, 0, sizeof(message));
-    HDsnprintf(message, sizeof(message),
-               "Local cause of Broken Collective I/O has the correct value for %s.\n", test_name);
-    VRFY((no_collective_cause_local_read == (uint32_t)no_collective_cause_local_expected), message);
-    HDmemset(message, 0, sizeof(message));
-    HDsnprintf(message, sizeof(message),
-               "Global cause of Broken Collective I/O has the correct value for %s.\n", test_name);
-    VRFY((no_collective_cause_global_read == (uint32_t)no_collective_cause_global_expected), message);
-
-    /* Release some resources */
-    if (sid)
-        H5Sclose(sid);
-    if (fapl_read)
-        H5Pclose(fapl_read);
-    if (dcpl)
-        H5Pclose(dcpl);
-    if (dxpl)
-        H5Pclose(dxpl);
-    if (dataset)
-        H5Dclose(dataset);
-    if (mem_space)
-        H5Sclose(mem_space);
-    if (file_space)
-        H5Sclose(file_space);
-    if (fid)
-        H5Fclose(fid);
-    HDfree(buffer);
-    return;
-}
-#endif
-
 /* Function: no_collective_cause_tests
  *
  * Purpose: Tests cases for broken collective IO.
@@ -3991,13 +3723,6 @@ no_collective_cause_tests(void)
     test_no_collective_cause_mode(TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES);
     test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_COMPACT);
     test_no_collective_cause_mode(TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL);
-#ifdef LATER /* fletcher32 */
-    /* TODO: use this instead of below TEST_FILTERS_READ when H5Dcreate and
-     * H5Dwrite is ready for mpio + filter feature.
-     */
-    /* test_no_collective_cause_mode (TEST_FILTERS); */
-    test_no_collective_cause_mode_filter(TEST_FILTERS_READ);
-#endif /* LATER */
 
     /*
      * Test combined causes
diff --git a/testpar/t_filters_parallel.c b/testpar/t_filters_parallel.c
index 78af0fb..8a55519 100644
--- a/testpar/t_filters_parallel.c
+++ b/testpar/t_filters_parallel.c
@@ -26,73 +26,139 @@
 const char *FILENAME[] = {"t_filters_parallel", NULL};
 char        filenames[1][256];
 
+static MPI_Comm comm = MPI_COMM_WORLD;
+static MPI_Info info = MPI_INFO_NULL;
+static int      mpi_rank;
+static int      mpi_size;
+
 int nerrors = 0;
 
-size_t cur_filter_idx = 0;
-#define GZIP_INDEX       0
-#define FLETCHER32_INDEX 1
+/* Arrays of filter ID values and filter names (should match each other) */
+H5Z_filter_t filterIDs[] = {
+    H5Z_FILTER_DEFLATE, H5Z_FILTER_SHUFFLE, H5Z_FILTER_FLETCHER32,
+    H5Z_FILTER_SZIP,    H5Z_FILTER_NBIT,    H5Z_FILTER_SCALEOFFSET,
+};
+
+const char *filterNames[] = {"Deflate", "Shuffle", "Fletcher32", "SZIP", "Nbit", "ScaleOffset"};
+
+/* Function pointer typedef for test functions */
+typedef void (*test_func)(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                          hid_t dxpl_id);
 
-#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+/* Typedef for filter arguments for user-defined filters */
+typedef struct filter_options_t {
+    unsigned int       flags;
+    size_t             cd_nelmts;
+    const unsigned int cd_values[];
+} filter_options_t;
 
 /*
- * Used to check if a filter is available before running a test.
+ * Enum for verify_space_alloc_status which specifies
+ * how many chunks have been written to in a dataset
  */
-#define CHECK_CUR_FILTER_AVAIL()                                                                             \
-    {                                                                                                        \
-        htri_t filter_is_avail;                                                                              \
-                                                                                                             \
-        if (cur_filter_idx == GZIP_INDEX) {                                                                  \
-            if ((filter_is_avail = H5Zfilter_avail(H5Z_FILTER_DEFLATE)) != TRUE) {                           \
-                if (MAINPROCESS) {                                                                           \
-                    HDputs("    - SKIPPED - Deflate filter not available");                                  \
-                }                                                                                            \
-                return;                                                                                      \
-            }                                                                                                \
-        }                                                                                                    \
-    }
+typedef enum num_chunks_written_t {
+    DATASET_JUST_CREATED,
+    NO_CHUNKS_WRITTEN,
+    SOME_CHUNKS_WRITTEN,
+    ALL_CHUNKS_WRITTEN
+} num_chunks_written_t;
 
-static herr_t set_dcpl_filter(hid_t dcpl);
+static herr_t set_dcpl_filter(hid_t dcpl_id, H5Z_filter_t filter_id, filter_options_t *filter_options);
+static herr_t verify_space_alloc_status(hid_t dset_id, hid_t dcpl_id, num_chunks_written_t chunks_written);
 
-#if MPI_VERSION >= 3
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
 /* Tests for writing data in parallel */
-static void test_write_one_chunk_filtered_dataset(void);
-static void test_write_filtered_dataset_no_overlap(void);
-static void test_write_filtered_dataset_overlap(void);
-static void test_write_filtered_dataset_single_no_selection(void);
-static void test_write_filtered_dataset_all_no_selection(void);
-static void test_write_filtered_dataset_point_selection(void);
-static void test_write_filtered_dataset_interleaved_write(void);
-static void test_write_transformed_filtered_dataset_no_overlap(void);
-static void test_write_3d_filtered_dataset_no_overlap_separate_pages(void);
-static void test_write_3d_filtered_dataset_no_overlap_same_pages(void);
-static void test_write_3d_filtered_dataset_overlap(void);
-static void test_write_cmpd_filtered_dataset_no_conversion_unshared(void);
-static void test_write_cmpd_filtered_dataset_no_conversion_shared(void);
-static void test_write_cmpd_filtered_dataset_type_conversion_unshared(void);
-static void test_write_cmpd_filtered_dataset_type_conversion_shared(void);
+static void test_write_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id,
+                                                  hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                   hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_no_overlap_partial(const char *parent_group, H5Z_filter_t filter_id,
+                                                           hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_single_unlim_dim_no_overlap(const char * parent_group,
+                                                                    H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                    hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_single_unlim_dim_overlap(const char * parent_group,
+                                                                 H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                 hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_multi_unlim_dim_no_overlap(const char * parent_group,
+                                                                   H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                   hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_multi_unlim_dim_overlap(const char * parent_group,
+                                                                H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                                            hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                                         hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                                        hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_filtered_dataset_interleaved_write(const char *parent_group, H5Z_filter_t filter_id,
+                                                          hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_transformed_filtered_dataset_no_overlap(const char * parent_group,
+                                                               H5Z_filter_t filter_id, hid_t fapl_id,
+                                                               hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_3d_filtered_dataset_no_overlap_separate_pages(const char * parent_group,
+                                                                     H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                     hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_3d_filtered_dataset_no_overlap_same_pages(const char * parent_group,
+                                                                 H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                 hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                   hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_cmpd_filtered_dataset_no_conversion_unshared(const char * parent_group,
+                                                                    H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                    hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_cmpd_filtered_dataset_no_conversion_shared(const char * parent_group,
+                                                                  H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                  hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_cmpd_filtered_dataset_type_conversion_unshared(const char * parent_group,
+                                                                      H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                      hid_t dcpl_id, hid_t dxpl_id);
+static void test_write_cmpd_filtered_dataset_type_conversion_shared(const char * parent_group,
+                                                                    H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                    hid_t dcpl_id, hid_t dxpl_id);
 #endif
 
 /* Tests for reading data in parallel */
-static void test_read_one_chunk_filtered_dataset(void);
-static void test_read_filtered_dataset_no_overlap(void);
-static void test_read_filtered_dataset_overlap(void);
-static void test_read_filtered_dataset_single_no_selection(void);
-static void test_read_filtered_dataset_all_no_selection(void);
-static void test_read_filtered_dataset_point_selection(void);
-static void test_read_filtered_dataset_interleaved_read(void);
-static void test_read_transformed_filtered_dataset_no_overlap(void);
-static void test_read_3d_filtered_dataset_no_overlap_separate_pages(void);
-static void test_read_3d_filtered_dataset_no_overlap_same_pages(void);
-static void test_read_3d_filtered_dataset_overlap(void);
-static void test_read_cmpd_filtered_dataset_no_conversion_unshared(void);
-static void test_read_cmpd_filtered_dataset_no_conversion_shared(void);
-static void test_read_cmpd_filtered_dataset_type_conversion_unshared(void);
-static void test_read_cmpd_filtered_dataset_type_conversion_shared(void);
-
-#if MPI_VERSION >= 3
-/* Other miscellaneous tests */
-static void test_shrinking_growing_chunks(void);
-#endif
+static void test_read_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id,
+                                                 hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                  hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                               hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                                           hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                                        hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                                       hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_filtered_dataset_interleaved_read(const char *parent_group, H5Z_filter_t filter_id,
+                                                        hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_transformed_filtered_dataset_no_overlap(const char * parent_group,
+                                                              H5Z_filter_t filter_id, hid_t fapl_id,
+                                                              hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_3d_filtered_dataset_no_overlap_separate_pages(const char * parent_group,
+                                                                    H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                    hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_3d_filtered_dataset_no_overlap_same_pages(const char * parent_group,
+                                                                H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                  hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_cmpd_filtered_dataset_no_conversion_unshared(const char * parent_group,
+                                                                   H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                   hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_cmpd_filtered_dataset_no_conversion_shared(const char * parent_group,
+                                                                 H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                 hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_cmpd_filtered_dataset_type_conversion_unshared(const char * parent_group,
+                                                                     H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                     hid_t dcpl_id, hid_t dxpl_id);
+static void test_read_cmpd_filtered_dataset_type_conversion_shared(const char * parent_group,
+                                                                   H5Z_filter_t filter_id, hid_t fapl_id,
+                                                                   hid_t dcpl_id, hid_t dxpl_id);
 
 /*
  * Tests for attempting to round-trip the data going from
@@ -103,21 +169,40 @@ static void test_shrinking_growing_chunks(void);
  *
  * written in parallel -> read serially
  */
-static void test_write_serial_read_parallel(void);
-#if MPI_VERSION >= 3
-static void test_write_parallel_read_serial(void);
-#endif
+static void test_write_serial_read_parallel(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                            hid_t dcpl_id, hid_t dxpl_id);
 
-static MPI_Comm comm = MPI_COMM_WORLD;
-static MPI_Info info = MPI_INFO_NULL;
-static int      mpi_rank;
-static int      mpi_size;
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
+static void test_write_parallel_read_serial(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                            hid_t dcpl_id, hid_t dxpl_id);
 
-static void (*tests[])(void) = {
-#if MPI_VERSION >= 3
+/* Other miscellaneous tests */
+static void test_shrinking_growing_chunks(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                          hid_t dcpl_id, hid_t dxpl_id);
+static void test_edge_chunks_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                        hid_t dcpl_id, hid_t dxpl_id);
+static void test_edge_chunks_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                     hid_t dcpl_id, hid_t dxpl_id);
+static void test_edge_chunks_partial_write(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                           hid_t dcpl_id, hid_t dxpl_id);
+static void test_fill_values(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                             hid_t dxpl_id);
+static void test_fill_value_undefined(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                      hid_t dcpl_id, hid_t dxpl_id);
+static void test_fill_time_never(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                 hid_t dcpl_id, hid_t dxpl_id);
+#endif
+
+static test_func tests[] = {
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
     test_write_one_chunk_filtered_dataset,
     test_write_filtered_dataset_no_overlap,
+    test_write_filtered_dataset_no_overlap_partial,
     test_write_filtered_dataset_overlap,
+    test_write_filtered_dataset_single_unlim_dim_no_overlap,
+    test_write_filtered_dataset_single_unlim_dim_overlap,
+    test_write_filtered_dataset_multi_unlim_dim_no_overlap,
+    test_write_filtered_dataset_multi_unlim_dim_overlap,
     test_write_filtered_dataset_single_no_selection,
     test_write_filtered_dataset_all_no_selection,
     test_write_filtered_dataset_point_selection,
@@ -147,33 +232,168 @@ static void (*tests[])(void) = {
     test_read_cmpd_filtered_dataset_type_conversion_unshared,
     test_read_cmpd_filtered_dataset_type_conversion_shared,
     test_write_serial_read_parallel,
-#if MPI_VERSION >= 3
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
     test_write_parallel_read_serial,
     test_shrinking_growing_chunks,
+    test_edge_chunks_no_overlap,
+    test_edge_chunks_overlap,
+    test_edge_chunks_partial_write,
+    test_fill_values,
+    test_fill_value_undefined,
+    test_fill_time_never,
 #endif
 };
 
 /*
  * Function to call the appropriate HDF5 filter-setting function
- * depending on the currently set index. Used to re-run the tests
+ * depending on the given filter ID. Used to re-run the tests
  * with different filters to check that the data still comes back
  * correctly under a variety of circumstances, such as the
  * Fletcher32 checksum filter increasing the size of the chunk.
  */
 static herr_t
-set_dcpl_filter(hid_t dcpl)
+set_dcpl_filter(hid_t dcpl_id, H5Z_filter_t filter_id, filter_options_t *filter_options)
+{
+    switch (filter_id) {
+        case H5Z_FILTER_DEFLATE:
+            return H5Pset_deflate(dcpl_id, DEFAULT_DEFLATE_LEVEL);
+        case H5Z_FILTER_SHUFFLE:
+            return H5Pset_shuffle(dcpl_id);
+        case H5Z_FILTER_FLETCHER32:
+            return H5Pset_fletcher32(dcpl_id);
+        case H5Z_FILTER_SZIP: {
+            unsigned pixels_per_block         = H5_SZIP_MAX_PIXELS_PER_BLOCK;
+            hsize_t  chunk_dims[H5S_MAX_RANK] = {0};
+            size_t   i, chunk_nelemts;
+
+            VRFY(H5Pget_chunk(dcpl_id, H5S_MAX_RANK, chunk_dims) >= 0, "H5Pget_chunk succeeded");
+
+            for (i = 0, chunk_nelemts = 1; i < H5S_MAX_RANK; i++)
+                if (chunk_dims[i] > 0)
+                    chunk_nelemts *= chunk_dims[i];
+
+            if (chunk_nelemts < H5_SZIP_MAX_PIXELS_PER_BLOCK) {
+                /*
+                 * Can't set SZIP for chunk of 1 data element.
+                 * Pixels-per-block value must be both even
+                 * and non-zero.
+                 */
+                if (chunk_nelemts == 1)
+                    return SUCCEED;
+
+                if ((chunk_nelemts % 2) == 0)
+                    pixels_per_block = (unsigned)chunk_nelemts;
+                else
+                    pixels_per_block = (unsigned)(chunk_nelemts - 1);
+            }
+            else
+                pixels_per_block = H5_SZIP_MAX_PIXELS_PER_BLOCK;
+
+            return H5Pset_szip(dcpl_id, 0, pixels_per_block);
+        }
+        case H5Z_FILTER_NBIT:
+            return H5Pset_nbit(dcpl_id);
+        case H5Z_FILTER_SCALEOFFSET:
+            return H5Pset_scaleoffset(dcpl_id, H5Z_SO_INT, 0);
+        default: {
+            if (!filter_options)
+                return FAIL;
+
+            return H5Pset_filter(dcpl_id, filter_id, filter_options->flags, filter_options->cd_nelmts,
+                                 filter_options->cd_values);
+        }
+    }
+}
+
+/*
+ * Function to verify the status of dataset storage space allocation
+ * based on the dataset's allocation time setting and how many chunks
+ * in the dataset have been written to.
+ */
+static herr_t
+verify_space_alloc_status(hid_t dset_id, hid_t dcpl_id, num_chunks_written_t chunks_written)
 {
-    switch (cur_filter_idx) {
-        case GZIP_INDEX:
-            return H5Pset_deflate(dcpl, DEFAULT_DEFLATE_LEVEL);
-        case FLETCHER32_INDEX:
-            return H5Pset_fletcher32(dcpl);
-        default:
-            return H5Pset_deflate(dcpl, DEFAULT_DEFLATE_LEVEL);
+    int    nfilters;
+    herr_t ret_value = SUCCEED;
+
+    VRFY(((nfilters = H5Pget_nfilters(dcpl_id)) >= 0), "H5Pget_nfilters succeeded");
+
+    /*
+     * Only verify space allocation status when there are filters
+     * in the dataset's filter pipeline. When filters aren't in the
+     * pipeline, the space allocation time and status can vary based
+     * on whether the file was created in parallel or serial mode.
+     */
+    if (nfilters > 0) {
+        H5D_space_status_t space_status;
+        H5D_alloc_time_t   alloc_time;
+
+        VRFY((H5Pget_alloc_time(dcpl_id, &alloc_time) >= 0), "H5Pget_alloc_time succeeded");
+        VRFY((H5Dget_space_status(dset_id, &space_status) >= 0), "H5Dget_space_status succeeded");
+
+        switch (alloc_time) {
+            case H5D_ALLOC_TIME_EARLY:
+                /*
+                 * Early space allocation should always result in the
+                 * full dataset storage space being allocated.
+                 */
+                VRFY(space_status == H5D_SPACE_STATUS_ALLOCATED, "verified space allocation status");
+                break;
+            case H5D_ALLOC_TIME_LATE:
+                /*
+                 * Late space allocation should always result in the
+                 * full dataset storage space being allocated when
+                 * the dataset gets written to. However, if the dataset
+                 * is extended the dataset's space allocation status
+                 * can become partly allocated until the dataset is
+                 * written to again.
+                 */
+                if (chunks_written == SOME_CHUNKS_WRITTEN || chunks_written == ALL_CHUNKS_WRITTEN)
+                    VRFY((space_status == H5D_SPACE_STATUS_ALLOCATED) ||
+                             (space_status == H5D_SPACE_STATUS_PART_ALLOCATED),
+                         "verified space allocation status");
+                else if (chunks_written == NO_CHUNKS_WRITTEN)
+                    /*
+                     * A special case where we wrote to a dataset that
+                     * uses late space allocation, but the write was
+                     * either a no-op (no selection in the dataset
+                     * from any rank) or something caused the write to
+                     * fail late in the process of performing the actual
+                     * write. In either case, space should still have
+                     * been allocated.
+                     */
+                    VRFY(space_status == H5D_SPACE_STATUS_ALLOCATED, "verified space allocation status");
+                else
+                    VRFY(space_status == H5D_SPACE_STATUS_NOT_ALLOCATED, "verified space allocation status");
+                break;
+            case H5D_ALLOC_TIME_DEFAULT:
+            case H5D_ALLOC_TIME_INCR:
+                /*
+                 * Incremental space allocation should result in
+                 * the dataset's storage space being incrementally
+                 * allocated as chunks are written to. Once all chunks
+                 * have been written to, the space allocation should be
+                 * seen as fully allocated.
+                 */
+                if (chunks_written == SOME_CHUNKS_WRITTEN)
+                    VRFY((space_status == H5D_SPACE_STATUS_PART_ALLOCATED),
+                         "verified space allocation status");
+                else if (chunks_written == ALL_CHUNKS_WRITTEN)
+                    VRFY((space_status == H5D_SPACE_STATUS_ALLOCATED), "verified space allocation status");
+                else
+                    VRFY(space_status == H5D_SPACE_STATUS_NOT_ALLOCATED, "verified space allocation status");
+                break;
+            default:
+                if (MAINPROCESS)
+                    MESG("unknown space allocation time");
+                MPI_Abort(MPI_COMM_WORLD, 1);
+        }
     }
+
+    return ret_value;
 }
 
-#if MPI_VERSION >= 3
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
 /*
  * Tests parallel write of filtered data in the special
  * case where a dataset is composed of a single chunk.
@@ -182,7 +402,8 @@ set_dcpl_filter(hid_t dcpl)
  *             02/01/2017
  */
 static void
-test_write_one_chunk_filtered_dataset(void)
+test_write_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                      hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
     C_DATATYPE *read_buf    = NULL;
@@ -195,26 +416,18 @@ test_write_one_chunk_filtered_dataset(void)
     hsize_t     count[WRITE_ONE_CHUNK_FILTERED_DATASET_DIMS];
     hsize_t     block[WRITE_ONE_CHUNK_FILTERED_DATASET_DIMS];
     size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
         HDputs("Testing write to one-chunk filtered dataset");
 
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
     dataset_dims[0] = (hsize_t)WRITE_ONE_CHUNK_FILTERED_DATASET_NROWS;
@@ -231,19 +444,21 @@ test_write_one_chunk_filtered_dataset(void)
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
     VRFY((H5Pset_chunk(plist_id, WRITE_ONE_CHUNK_FILTERED_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+    dset_id = H5Dcreate2(group_id, WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
                          H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
@@ -293,15 +508,12 @@ test_write_one_chunk_filtered_dataset(void)
                          ((C_DATATYPE)i / (WRITE_ONE_CHUNK_FILTERED_DATASET_CH_NROWS / mpi_size *
                                            WRITE_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS));
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
     if (data)
         HDfree(data);
 
@@ -311,10 +523,10 @@ test_write_one_chunk_filtered_dataset(void)
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
@@ -324,10 +536,11 @@ test_write_one_chunk_filtered_dataset(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -343,7 +556,8 @@ test_write_one_chunk_filtered_dataset(void)
  *             02/01/2017
  */
 static void
-test_write_filtered_dataset_no_overlap(void)
+test_write_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                       hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
     C_DATATYPE *read_buf    = NULL;
@@ -356,27 +570,18 @@ test_write_filtered_dataset_no_overlap(void)
     hsize_t     count[WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
     hsize_t     block[WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
     size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
         HDputs("Testing write to unshared filtered chunks");
 
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
     dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_NROWS;
@@ -393,20 +598,22 @@ test_write_filtered_dataset_no_overlap(void)
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
     VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
                          H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
@@ -454,15 +661,12 @@ test_write_filtered_dataset_no_overlap(void)
         correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
                                       (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
     if (data)
         HDfree(data);
 
@@ -472,10 +676,10 @@ test_write_filtered_dataset_no_overlap(void)
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
@@ -485,102 +689,91 @@ test_write_filtered_dataset_no_overlap(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data in the case where
- * more than one process is writing to a particular chunk
- * in the operation. In this case, the chunks have to be
- * redistributed before the operation so that only one process
- * writes to a particular chunk.
- *
- * Programmer: Jordan Henderson
- *             02/01/2017
+ * Tests parallel write of filtered data in the case where only
+ * one process is writing to a particular chunk in the operation
+ * and that process only writes to part of a chunk.
  */
 static void
-test_write_filtered_dataset_overlap(void)
+test_write_filtered_dataset_no_overlap_partial(const char *parent_group, H5Z_filter_t filter_id,
+                                               hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     start[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     stride[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     count[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     block[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     dataset_dims[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS];
+    hsize_t     start[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS];
+    hsize_t     stride[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS];
+    hsize_t     count[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS];
+    hsize_t     block[WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS];
     size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to shared filtered chunks");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+        HDputs("Testing partial write to unshared filtered chunks");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NCOLS;
-    chunk_dims[0]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
-    sel_dims[1]     = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
-
-    filespace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS;
+    sel_dims[1]     = (hsize_t)(WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS /
+                            WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS);
+
+    filespace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
-
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
-                         H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_NAME, HDF5_DATATYPE_NAME,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
-    count[0]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NROWS / (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS;
-    count[1]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NCOLS / (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS / (hsize_t)mpi_size;
-    block[1]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank * block[0];
+    count[0]  = 1;
+    count[1]  = (hsize_t)(WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS /
+                         WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS);
+    stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS;
+    block[1]  = (hsize_t)1;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS * count[0]);
     start[1]  = 0;
 
     if (VERBOSE_MED) {
@@ -611,33 +804,38 @@ test_write_filtered_dataset_overlap(void)
     for (i = 0; i < data_size / sizeof(*data); i++)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)(
-            (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-            (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
-
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    for (i = 0; i < (size_t)mpi_size; i++) {
+        size_t rank_n_elems = (size_t)(mpi_size * (WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS *
+                                                   WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS));
+        size_t data_idx     = i;
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+        for (size_t j = 0; j < rank_n_elems; j++) {
+            if ((j % WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS) == 0) {
+                correct_buf[(i * rank_n_elems) + j] = (C_DATATYPE)data_idx;
+                data_idx++;
+            }
+        }
+    }
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
     if (data)
         HDfree(data);
 
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    /* Verify correct data was written */
+    /* Verify the correct data was written */
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
@@ -647,10 +845,10 @@ test_write_filtered_dataset_overlap(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -658,98 +856,85 @@ test_write_filtered_dataset_overlap(void)
 
 /*
  * Tests parallel write of filtered data in the case where
- * a single process in the write operation has no selection
- * in the dataset's dataspace. In this case, the process with
- * no selection still has to participate in the collective
- * space re-allocation for the filtered chunks and also must
- * participate in the re-insertion of the filtered chunks
- * into the chunk index.
+ * more than one process is writing to a particular chunk
+ * in the operation. In this case, the chunks have to be
+ * redistributed before the operation so that only one process
+ * writes to a particular chunk.
  *
  * Programmer: Jordan Henderson
  *             02/01/2017
  */
 static void
-test_write_filtered_dataset_single_no_selection(void)
+test_write_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                    hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     start[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     stride[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     count[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     block[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     dataset_dims[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     start[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     count[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     block[WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
     size_t      i, data_size, correct_buf_size;
-    size_t      segment_length;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to filtered chunks with a single process having no selection");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing write to shared filtered chunks");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
-    chunk_dims[0]   = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    sel_dims[0]     = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    sel_dims[1]     = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
-
-    if (mpi_rank == WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
-        sel_dims[0] = sel_dims[1] = 0;
+    dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1]     = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
 
-    filespace = H5Screate_simple(WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    filespace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
+    memspace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
-                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                         H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
-    count[0] = 1;
-    count[1] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS /
-               (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    block[1]  = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank * (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS * count[0];
+    count[0]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NROWS / (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS;
+    count[1]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_NCOLS / (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * block[0];
     start[1]  = 0;
 
     if (VERBOSE_MED) {
@@ -764,11 +949,8 @@ test_write_filtered_dataset_single_no_selection(void)
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    if (mpi_rank == WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
-        VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
-    else
-        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-             "Hyperslab selection succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
     /* Fill data buffer */
     data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
@@ -784,37 +966,29 @@ test_write_filtered_dataset_single_no_selection(void)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
-                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
-
-    /* Compute the correct offset into the buffer for the process having no selection and clear it */
-    segment_length = dataset_dims[0] * dataset_dims[1] / (hsize_t)mpi_size;
-    HDmemset(correct_buf +
-                 ((size_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC * segment_length),
-             0, segment_length * sizeof(*data));
-
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+        correct_buf[i] = (C_DATATYPE)(
+            (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+            (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
     if (data)
         HDfree(data);
 
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    /* Verify the correct data was written */
+    /* Verify correct data was written */
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
@@ -824,294 +998,318 @@ test_write_filtered_dataset_single_no_selection(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data in the case
- * where no process in the write operation has a
- * selection in the dataset's dataspace. This test is
- * to ensure that there are no assertion failures or
- * similar issues due to size 0 allocations and the
- * like. In this case, the file and dataset are created
- * but the dataset is populated with the default fill
- * value.
- *
- * Programmer: Jordan Henderson
- *             02/02/2017
+ * Tests parallel write of filtered data in the case where
+ * a dataset has a single unlimited dimension and each
+ * MPI rank writes to its own separate chunk. On each
+ * iteration, the dataset is extended in its extensible
+ * dimension by "MPI size" chunks per rank and the new
+ * chunks are written to, read back and verified.
  */
 static void
-test_write_filtered_dataset_all_no_selection(void)
+test_write_filtered_dataset_single_unlim_dim_no_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                        hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *data        = NULL;
-    C_DATATYPE *read_buf    = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    C_DATATYPE *data     = NULL;
+    C_DATATYPE *read_buf = NULL;
+    hsize_t     dataset_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     max_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     start[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     stride[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     count[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     block[WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    size_t      i, data_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to filtered chunks with all processes having no selection");
+        HDputs("Testing write to unshared filtered chunks w/ single unlimited dimension");
 
-    CHECK_CUR_FILTER_AVAIL();
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS;
+    max_dims[0]     = dataset_dims[0];
+    max_dims[1]     = H5S_UNLIMITED;
+    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS;
+
+    filespace = H5Screate_simple(WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0),
+         "Chunk size set");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
-    chunk_dims[0]   = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    sel_dims[0] = sel_dims[1] = 0;
-
-    filespace = H5Screate_simple(WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
-    VRFY((filespace >= 0), "File dataspace creation succeeded");
-
-    memspace = H5Screate_simple(WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
-
-    /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
-
-    VRFY((H5Pset_chunk(plist_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
-         "Chunk size set");
-
-    /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
-
-    dset_id = H5Dcreate2(file_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
-                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                         H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Fill data buffer */
-    data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
     data = (C_DATATYPE *)HDcalloc(1, data_size);
     VRFY((NULL != data), "HDcalloc succeeded");
 
-    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+    read_buf = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
 
     for (i = 0; i < data_size / sizeof(*data); i++)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    for (i = 0; i < (size_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NLOOPS; i++) {
+        /* Select hyperslab in the file */
+        filespace = H5Dget_space(dset_id);
+        VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+        /* Each process defines the dataset selection in memory and writes
+         * it to the hyperslab in the file
+         */
+        count[0] = 1;
+        count[1] =
+            (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS / (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS;
+        stride[0] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS;
+        stride[1] = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS;
+        block[0]  = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS;
+        block[1]  = (hsize_t)WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS;
+        start[0]  = ((hsize_t)mpi_rank * block[0] * count[0]);
+        start[1]  = i * count[1] * block[1];
+
+        if (VERBOSE_MED) {
+            HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ]\n",
+                     mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0],
+                     block[1]);
+            HDfflush(stdout);
+        }
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
-         "Dataset write succeeded");
+        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+             "Hyperslab selection succeeded");
 
-    if (data)
-        HDfree(data);
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+             "Dataset write succeeded");
 
-    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    /* Verify the correct data was written */
-    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+        dset_id = H5Dopen2(group_id, WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
-         "Dataset read succeeded");
+        HDmemset(read_buf, 255, data_size);
 
-    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+             "Dataset read succeeded");
 
-    if (correct_buf)
-        HDfree(correct_buf);
+        /* Verify the correct data was written */
+        VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
+
+        if (i < (size_t)WRITE_UNSHARED_ONE_UNLIM_DIM_NLOOPS - 1) {
+            /* Extend the dataset by count[1] chunks in the extensible dimension */
+            dataset_dims[1] += count[1] * block[1];
+            VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded");
+
+            /* Verify space allocation status */
+            verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
+        }
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    }
+
+    if (data)
+        HDfree(data);
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data by using
- * point selections instead of hyperslab selections.
- *
- * Programmer: Jordan Henderson
- *             02/02/2017
+ * Tests parallel write of filtered data in the case where
+ * a dataset has a single unlimited dimension and each
+ * MPI rank writes to a portion of each chunk in the dataset.
+ * On each iteration, the dataset is extended in its extensible
+ * dimension by two chunks and the new chunks are written to
+ * by all ranks, then read back and verified.
  */
 static void
-test_write_filtered_dataset_point_selection(void)
+test_write_filtered_dataset_single_unlim_dim_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                     hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *data        = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    C_DATATYPE *read_buf    = NULL;
-    hsize_t *   coords      = NULL;
-    hsize_t     dataset_dims[WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    size_t      i, j, data_size, correct_buf_size;
-    size_t      num_points;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    C_DATATYPE *data     = NULL;
+    C_DATATYPE *read_buf = NULL;
+    hsize_t     dataset_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     max_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     start[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     stride[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     count[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     block[WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS];
+    size_t      i, data_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to filtered chunks with point selection");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+        HDputs("Testing write to shared filtered chunks w/ single unlimited dimension");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
-    chunk_dims[0]   = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    sel_dims[0]     = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NROWS / (hsize_t)mpi_size;
-    sel_dims[1]     = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
+    dataset_dims[0] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NCOLS;
+    max_dims[0]     = dataset_dims[0];
+    max_dims[1]     = H5S_UNLIMITED;
+    chunk_dims[0]   = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1]     = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
 
-    filespace = H5Screate_simple(WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    filespace = H5Screate_simple(WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
-
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
-                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_SHARED_ONE_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                         H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-
-    /* Set up point selection */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
-
-    num_points = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NROWS *
-                 (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS / (hsize_t)mpi_size;
-    coords = (hsize_t *)HDcalloc(1, 2 * num_points * sizeof(*coords));
-    VRFY((NULL != coords), "Coords HDcalloc succeeded");
-
-    for (i = 0; i < num_points; i++)
-        for (j = 0; j < WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS; j++)
-            coords[(i * WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS) + j] =
-                (j > 0) ? (i % (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)
-                        : ((hsize_t)mpi_rank +
-                           ((hsize_t)mpi_size * (i / (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)));
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    VRFY((H5Sselect_elements(filespace, H5S_SELECT_SET, (hsize_t)num_points, (const hsize_t *)coords) >= 0),
-         "Point selection succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Fill data buffer */
-    data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
     data = (C_DATATYPE *)HDcalloc(1, data_size);
     VRFY((NULL != data), "HDcalloc succeeded");
 
-    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+    read_buf = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
 
     for (i = 0; i < data_size / sizeof(*data); i++)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)(
-            (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-            (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    for (i = 0; i < (size_t)WRITE_SHARED_ONE_UNLIM_DIM_NLOOPS; i++) {
+        /* Select hyperslab in the file */
+        filespace = H5Dget_space(dset_id);
+        VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+        /* Each process defines the dataset selection in memory and writes
+         * it to the hyperslab in the file
+         */
+        count[0]  = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NROWS / (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS;
+        count[1]  = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_NCOLS / (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS;
+        stride[0] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS;
+        stride[1] = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS;
+        block[0]  = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS / (hsize_t)mpi_size;
+        block[1]  = (hsize_t)WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS;
+        start[0]  = (hsize_t)mpi_rank * block[0];
+        start[1]  = i * count[1] * block[1];
+
+        if (VERBOSE_MED) {
+            HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ]\n",
+                     mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0],
+                     block[1]);
+            HDfflush(stdout);
+        }
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+             "Hyperslab selection succeeded");
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
-         "Dataset write succeeded");
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+             "Dataset write succeeded");
 
-    if (data)
-        HDfree(data);
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    /* Verify the correct data was written */
-    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
+        dset_id = H5Dopen2(group_id, WRITE_SHARED_ONE_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+        HDmemset(read_buf, 255, data_size);
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
-         "Dataset read succeeded");
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+             "Dataset read succeeded");
 
-    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+        /* Verify correct data was written */
+        VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
 
-    if (coords)
-        HDfree(coords);
-    if (correct_buf)
-        HDfree(correct_buf);
+        if (i < (size_t)WRITE_SHARED_ONE_UNLIM_DIM_NLOOPS - 1) {
+            /* Extend the dataset by count[1] chunks in the extensible dimension */
+            dataset_dims[1] += count[1] * block[1];
+            VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded");
+
+            /* Verify space allocation status */
+            verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
+        }
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    }
+
+    if (data)
+        HDfree(data);
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -1119,272 +1317,424 @@ test_write_filtered_dataset_point_selection(void)
 
 /*
  * Tests parallel write of filtered data in the case where
- * each process writes an equal amount of data to each chunk
- * in the dataset. Each chunk is distributed among the
- * processes in round-robin fashion by blocks of size 1 until
- * the whole chunk is selected, leading to an interleaved
- * write pattern.
- *
- * Programmer: Jordan Henderson
- *             02/02/2017
+ * a dataset has two unlimited dimensions and each
+ * MPI rank writes to its own separate chunks. On each
+ * iteration, the dataset is extended in its first
+ * extensible dimension by the size of one chunk per rank
+ * and in its second extensible dimension by the size of
+ * one chunk. Then, all chunks are written to, read back
+ * and verified.
  */
 static void
-test_write_filtered_dataset_interleaved_write(void)
+test_write_filtered_dataset_multi_unlim_dim_no_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                       hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *data        = NULL;
-    C_DATATYPE *read_buf    = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
-    hsize_t     chunk_dims[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
-    hsize_t     sel_dims[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
-    hsize_t     start[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
-    hsize_t     stride[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
-    hsize_t     count[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
-    hsize_t     block[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
-    size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    C_DATATYPE *data     = NULL;
+    C_DATATYPE *read_buf = NULL;
+    hsize_t     dataset_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     max_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     start[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     stride[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     count[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     block[WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    size_t      i, data_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing interleaved write to filtered chunks");
+        HDputs("Testing write to unshared filtered chunks w/ two unlimited dimensions");
 
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NROWS;
-    dataset_dims[1] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS;
-    chunk_dims[0]   = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS;
-    sel_dims[0]     = (hsize_t)(INTERLEAVED_WRITE_FILTERED_DATASET_NROWS / mpi_size);
-    sel_dims[1]     = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS;
-
-    filespace = H5Screate_simple(INTERLEAVED_WRITE_FILTERED_DATASET_DIMS, dataset_dims, NULL);
+    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NCOLS;
+    max_dims[0]     = H5S_UNLIMITED;
+    max_dims[1]     = H5S_UNLIMITED;
+    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NCOLS;
+
+    filespace = H5Screate_simple(WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(INTERLEAVED_WRITE_FILTERED_DATASET_DIMS, sel_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
-
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, INTERLEAVED_WRITE_FILTERED_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, INTERLEAVED_WRITE_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
                          H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /* Each process defines the dataset selection in memory and writes
-     * it to the hyperslab in the file
-     */
-    count[0] =
-        (hsize_t)(INTERLEAVED_WRITE_FILTERED_DATASET_NROWS / INTERLEAVED_WRITE_FILTERED_DATASET_CH_NROWS);
-    count[1] =
-        (hsize_t)(INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS / INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS);
-    stride[0] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NROWS;
-    stride[1] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS;
-    block[0]  = 1;
-    block[1]  = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank;
-    start[1]  = 0;
+    for (i = 0; i < (size_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NLOOPS; i++) {
+        C_DATATYPE *tmp_realloc = NULL;
+        size_t      j;
 
-    if (VERBOSE_MED) {
-        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ]\n",
-                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
-        HDfflush(stdout);
-    }
+        /* Set selected dimensions */
+        sel_dims[0] = (i + 1) * WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS;
+        sel_dims[1] = (i + 1) * WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS;
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+        /* Fill data buffer */
+        data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-         "Hyperslab selection succeeded");
+        tmp_realloc = (C_DATATYPE *)HDrealloc(data, data_size);
+        VRFY((NULL != tmp_realloc), "HDrealloc succeeded");
+        data = tmp_realloc;
 
-    /* Fill data buffer */
-    data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+        tmp_realloc = (C_DATATYPE *)HDrealloc(read_buf, data_size);
+        VRFY((NULL != tmp_realloc), "HDrealloc succeeded");
+        read_buf = tmp_realloc;
 
-    data = (C_DATATYPE *)HDcalloc(1, data_size);
-    VRFY((NULL != data), "HDcalloc succeeded");
+        for (j = 0; j < data_size / sizeof(*data); j++)
+            data[j] = (C_DATATYPE)GEN_DATA(j);
 
-    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+        /* Select hyperslab in the file */
+        filespace = H5Dget_space(dset_id);
+        VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    for (i = 0; i < data_size / sizeof(*data); i++)
-        data[i] = (C_DATATYPE)GEN_DATA(i);
+        /* Each process defines the dataset selection in memory and writes
+         * it to the hyperslab in the file
+         */
+        count[0]  = (i + 1);
+        count[1]  = (i + 1);
+        stride[0] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS;
+        stride[1] = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS;
+        block[0]  = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS;
+        block[1]  = (hsize_t)WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS;
+        start[0]  = ((hsize_t)mpi_rank * block[0] * count[0]);
+        start[1]  = 0;
+
+        if (VERBOSE_MED) {
+            HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ]\n",
+                     mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0],
+                     block[1]);
+            HDfflush(stdout);
+        }
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        /* Add Column Index */
-        correct_buf[i] =
-            (C_DATATYPE)((i % (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS)
+        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+             "Hyperslab selection succeeded");
 
-                         /* Add the Row Index */
-                         + ((i % (hsize_t)(mpi_size * INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS)) /
-                            (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS)
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+             "Dataset write succeeded");
 
-                         /* Add the amount that gets added when a rank moves down to its next section
-                            vertically in the dataset */
-                         + ((hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS *
-                            (i / (hsize_t)(mpi_size * INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS))));
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+
+        dset_id = H5Dopen2(group_id, WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+        HDmemset(read_buf, 255, data_size);
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
-         "Dataset write succeeded");
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+             "Dataset read succeeded");
+
+        /* Verify the correct data was written */
+        VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
+
+        if (i < (size_t)WRITE_UNSHARED_TWO_UNLIM_DIM_NLOOPS - 1) {
+            /*
+             * Extend the dataset by the size of one chunk per rank
+             * in the first extensible dimension. Extend the dataset
+             * by the size of chunk in the second extensible dimension.
+             */
+            dataset_dims[0] += (hsize_t)mpi_size * block[0];
+            dataset_dims[1] += block[1];
+            VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded");
+
+            /* Verify space allocation status */
+            verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
+        }
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    }
 
     if (data)
         HDfree(data);
+    if (read_buf)
+        HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
-    /* Verify the correct data was written */
-    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
+    return;
+}
 
-    dset_id = H5Dopen2(file_id, "/" INTERLEAVED_WRITE_FILTERED_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+/*
+ * Tests parallel write of filtered data in the case where
+ * a dataset has two unlimited dimensions and each MPI
+ * rank writes to a portion of each chunk in the dataset.
+ * On each iteration, the dataset is extended in its extensible
+ * dimensions by the size of a chunk and then all chunks are
+ * written to by all ranks, then read back and verified.
+ */
+static void
+test_write_filtered_dataset_multi_unlim_dim_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                    hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
+{
+    C_DATATYPE *data     = NULL;
+    C_DATATYPE *read_buf = NULL;
+    hsize_t     dataset_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     max_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     start[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     stride[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     count[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    hsize_t     block[WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS];
+    size_t      i, data_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
-         "Dataset read succeeded");
+    if (MAINPROCESS)
+        HDputs("Testing write to shared filtered chunks w/ two unlimited dimensions");
 
-    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-    if (correct_buf)
-        HDfree(correct_buf);
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_NCOLS;
+    max_dims[0]     = H5S_UNLIMITED;
+    max_dims[1]     = H5S_UNLIMITED;
+    chunk_dims[0]   = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1]     = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
+
+    filespace = H5Screate_simple(WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS, dataset_dims, max_dims);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+    VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS, chunk_dims) >= 0),
+         "Chunk size set");
+
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+    dset_id = H5Dcreate2(group_id, WRITE_SHARED_TWO_UNLIM_DIM_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                         H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+    for (i = 0; i < (size_t)WRITE_SHARED_TWO_UNLIM_DIM_NLOOPS; i++) {
+        C_DATATYPE *tmp_realloc = NULL;
+        size_t      j;
+
+        /* Set selected dimensions */
+        sel_dims[0] = (i + 1);
+        sel_dims[1] = (i + 1) * (size_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS;
+
+        /* Fill data buffer */
+        data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
+
+        tmp_realloc = (C_DATATYPE *)HDrealloc(data, data_size);
+        VRFY((NULL != tmp_realloc), "HDrealloc succeeded");
+        data = tmp_realloc;
+
+        tmp_realloc = (C_DATATYPE *)HDrealloc(read_buf, data_size);
+        VRFY((NULL != tmp_realloc), "HDrealloc succeeded");
+        read_buf = tmp_realloc;
+
+        for (j = 0; j < data_size / sizeof(*data); j++)
+            data[j] = (C_DATATYPE)GEN_DATA(j);
+
+        /* Select hyperslab in the file */
+        filespace = H5Dget_space(dset_id);
+        VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+        /* Each process defines the dataset selection in memory and writes
+         * it to the hyperslab in the file
+         */
+        count[0]  = (i + 1);
+        count[1]  = (i + 1);
+        stride[0] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS;
+        stride[1] = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS;
+        block[0]  = 1;
+        block[1]  = (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS;
+        start[0]  = (hsize_t)mpi_rank;
+        start[1]  = 0;
+
+        if (VERBOSE_MED) {
+            HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                     ", %" PRIuHSIZE " ]\n",
+                     mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0],
+                     block[1]);
+            HDfflush(stdout);
+        }
+
+        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+             "Hyperslab selection succeeded");
+
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+
+        dset_id = H5Dopen2(group_id, WRITE_SHARED_TWO_UNLIM_DIM_DATASET_NAME, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset open succeeded");
+
+        HDmemset(read_buf, 255, data_size);
+
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+             "Dataset read succeeded");
+
+        /* Verify correct data was written */
+        VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
+
+        if (i < (size_t)WRITE_SHARED_TWO_UNLIM_DIM_NLOOPS - 1) {
+            /* Extend the dataset by the size of a chunk in each extensible dimension */
+            dataset_dims[0] += (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS;
+            dataset_dims[1] += (hsize_t)WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS;
+            VRFY(H5Dset_extent(dset_id, dataset_dims) >= 0, "H5Dset_extent succeeded");
+
+            /* Verify space allocation status */
+            verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
+        }
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    }
+
+    if (data)
+        HDfree(data);
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of transformed and filtered data
- * in the case where only one process is writing to a
- * particular chunk in the operation. Normally, a data
- * transform function will cause the parallel library to
- * break to independent I/O and this isn't allowed when
- * there are filters in the pipeline. However, in this
- * case the parallel library recognizes that the used
- * data transform function "x" is the same as not applying
- * the transform function. Therefore it does not apply
- * the transform function resulting in not breaking to
- * independent I/O.
+ * Tests parallel write of filtered data in the case where
+ * a single process in the write operation has no selection
+ * in the dataset's dataspace. In this case, the process with
+ * no selection still has to participate in the collective
+ * space re-allocation for the filtered chunks and also must
+ * participate in the re-insertion of the filtered chunks
+ * into the chunk index.
  *
- * Programmer: Jan-Willem Blokland
- *             08/20/2021
+ * Programmer: Jordan Henderson
+ *             02/01/2017
  */
 static void
-test_write_transformed_filtered_dataset_no_overlap(void)
+test_write_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                                hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     start[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     stride[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     count[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     block[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     dataset_dims[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     start[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     count[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     block[WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
     size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    size_t      segment_length;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to unshared transformed and filtered chunks");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing write to filtered chunks with a single process having no selection");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
-    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
-    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
+    dataset_dims[0] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
 
-    filespace = H5Screate_simple(WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    if (mpi_rank == WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
+        sel_dims[0] = sel_dims[1] = 0;
+
+    filespace = H5Screate_simple(WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
+    memspace = H5Screate_simple(WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+    dset_id = H5Dcreate2(group_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
                          filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
     count[0] = 1;
-    count[1] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS /
-               (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-    block[1]  = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
-    start[0]  = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS * count[0]);
+    count[1] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS /
+               (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * (hsize_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS * count[0];
     start[1]  = 0;
 
     if (VERBOSE_MED) {
@@ -1399,38 +1749,43 @@ test_write_transformed_filtered_dataset_no_overlap(void)
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-         "Hyperslab selection succeeded");
+    if (mpi_rank == WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
+        VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
+    else
+        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+             "Hyperslab selection succeeded");
 
     /* Fill data buffer */
     data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
     correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
-    data = (C_DATATYPE *)HDcalloc(1, data_size);
-    VRFY((NULL != data), "HDcalloc succeeded");
+    if (mpi_rank != WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC) {
+        data = (C_DATATYPE *)HDcalloc(1, data_size);
+        VRFY((NULL != data), "HDcalloc succeeded");
+
+        for (i = 0; i < data_size / sizeof(*data); i++)
+            data[i] = (C_DATATYPE)GEN_DATA(i);
+    }
 
     correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    for (i = 0; i < data_size / sizeof(*data); i++)
-        data[i] = (C_DATATYPE)GEN_DATA(i);
-
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
         correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
                                       (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
 
-    /* Create property list for collective dataset write and data transform */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    /* Set data transform expression */
-    VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded");
+    /* Compute the correct offset into the buffer for the process having no selection and clear it */
+    segment_length = dataset_dims[0] * dataset_dims[1] / (hsize_t)mpi_size;
+    HDmemset(correct_buf +
+                 ((size_t)WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC * segment_length),
+             0, segment_length * sizeof(*data));
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status - data should only have been written if MPI size > 1 */
+    verify_space_alloc_status(dset_id, plist_id, (mpi_size > 1 ? SOME_CHUNKS_WRITTEN : NO_CHUNKS_WRITTEN));
+
     if (data)
         HDfree(data);
 
@@ -1440,10 +1795,10 @@ test_write_transformed_filtered_dataset_no_overlap(void)
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
@@ -1453,131 +1808,93 @@ test_write_transformed_filtered_dataset_no_overlap(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data in the case where
- * the dataset has 3 dimensions and each process writes
- * to its own "page" in the 3rd dimension.
+ * Tests parallel write of filtered data in the case
+ * where no process in the write operation has a
+ * selection in the dataset's dataspace. This test is
+ * to ensure that there are no assertion failures or
+ * similar issues due to size 0 allocations and the
+ * like. In this case, the file and dataset are created
+ * but the dataset is populated with the default fill
+ * value.
  *
  * Programmer: Jordan Henderson
- *             02/06/2017
+ *             02/02/2017
  */
 static void
-test_write_3d_filtered_dataset_no_overlap_separate_pages(void)
+test_write_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                             hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t     start[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t     stride[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t     count[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t     block[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t     dataset_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
     size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to unshared filtered chunks on separate pages in 3D dataset");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing write to filtered chunks with all processes having no selection");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
-    dataset_dims[2] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DEPTH;
-    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-    chunk_dims[2]   = 1;
-    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
-    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
-    sel_dims[2]     = 1;
+    dataset_dims[0] = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    sel_dims[0] = sel_dims[1] = 0;
 
-    filespace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, dataset_dims, NULL);
+    filespace = H5Screate_simple(WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, sel_dims, NULL);
+    memspace = H5Screate_simple(WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, HDF5_DATATYPE_NAME,
+    dset_id = H5Dcreate2(group_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
                          filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-
-    /* Each process defines the dataset selection in memory and writes
-     * it to the hyperslab in the file
-     */
-    count[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS /
-               (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-    count[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS /
-               (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-    count[2]  = 1;
-    stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-    stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-    stride[2] = 1;
-    block[0]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-    block[1]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-    block[2]  = 1;
-    start[0]  = 0;
-    start[1]  = 0;
-    start[2]  = (hsize_t)mpi_rank;
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    if (VERBOSE_MED) {
-        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ]\n",
-                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], start[0], start[1],
-                 start[2], block[0], block[1], block[2]);
-        HDfflush(stdout);
-    }
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /* Select hyperslab in the file */
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-         "Hyperslab selection succeeded");
+    VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
 
     /* Fill data buffer */
-    data_size        = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+    data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
     data = (C_DATATYPE *)HDcalloc(1, data_size);
     VRFY((NULL != data), "HDcalloc succeeded");
@@ -1588,18 +1905,12 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void)
     for (i = 0; i < data_size / sizeof(*data); i++)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (hsize_t)mpi_size) + (i / (hsize_t)mpi_size));
-
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status - no ranks should have written any data */
+    verify_space_alloc_status(dset_id, plist_id, NO_CHUNKS_WRITTEN);
+
     if (data)
         HDfree(data);
 
@@ -1609,10 +1920,10 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void)
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
@@ -1622,132 +1933,104 @@ test_write_3d_filtered_dataset_no_overlap_separate_pages(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data in the case where
- * the dataset has 3 dimensions and each process writes
- * to each "page" in the 3rd dimension. However, no chunk
- * on a given "page" is written to by more than one process.
+ * Tests parallel write of filtered data by using
+ * point selections instead of hyperslab selections.
  *
  * Programmer: Jordan Henderson
- *             02/06/2017
+ *             02/02/2017
  */
 static void
-test_write_3d_filtered_dataset_no_overlap_same_pages(void)
+test_write_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                            hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
-    C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     start[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     stride[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     count[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     block[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    size_t      i, data_size, correct_buf_size;
-    hid_t       file_id, dset_id, plist_id;
-    hid_t       filespace, memspace;
+    C_DATATYPE *read_buf    = NULL;
+    hsize_t *   coords      = NULL;
+    hsize_t     dataset_dims[WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    size_t      i, j, data_size, correct_buf_size;
+    size_t      num_points;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to unshared filtered chunks on the same pages in 3D dataset");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing write to filtered chunks with point selection");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
-    dataset_dims[2] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
-    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-    chunk_dims[2]   = 1;
-    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
-    sel_dims[2]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
+    dataset_dims[0] = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NROWS / (hsize_t)mpi_size;
+    sel_dims[1]     = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
 
-    filespace =
-        H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, dataset_dims, NULL);
+    filespace = H5Screate_simple(WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, sel_dims, NULL);
+    memspace = H5Screate_simple(WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME,
-                         HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-
-    /* Each process defines the dataset selection in memory and writes
-     * it to the hyperslab in the file
-     */
-    count[0] = 1;
-    count[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS /
-               (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-    count[2]  = (hsize_t)mpi_size;
-    stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-    stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-    stride[2] = 1;
-    block[0]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-    block[1]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-    block[2]  = 1;
-    start[0] = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS * count[0]);
-    start[1] = 0;
-    start[2] = 0;
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    if (VERBOSE_MED) {
-        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ]\n",
-                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], start[0], start[1],
-                 start[2], block[0], block[1], block[2]);
-        HDfflush(stdout);
-    }
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /* Select hyperslab in the file */
+    /* Set up point selection */
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-         "Hyperslab selection succeeded");
+    num_points = (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NROWS *
+                 (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS / (hsize_t)mpi_size;
+    coords = (hsize_t *)HDcalloc(1, 2 * num_points * sizeof(*coords));
+    VRFY((NULL != coords), "Coords HDcalloc succeeded");
+
+    for (i = 0; i < num_points; i++)
+        for (j = 0; j < WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS; j++)
+            coords[(i * WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS) + j] =
+                (j > 0) ? (i % (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)
+                        : ((hsize_t)mpi_rank +
+                           ((hsize_t)mpi_size * (i / (hsize_t)WRITE_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)));
+
+    VRFY((H5Sselect_elements(filespace, H5S_SELECT_SET, (hsize_t)num_points, (const hsize_t *)coords) >= 0),
+         "Point selection succeeded");
 
     /* Fill data buffer */
-    data_size        = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+    data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
     data = (C_DATATYPE *)HDcalloc(1, data_size);
     VRFY((NULL != data), "HDcalloc succeeded");
@@ -1759,18 +2042,16 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] * dataset_dims[1])) +
-                                      (i / (dataset_dims[0] * dataset_dims[1])));
-
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+        correct_buf[i] = (C_DATATYPE)(
+            (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+            (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
     if (data)
         HDfree(data);
 
@@ -1780,23 +2061,26 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void)
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
+    if (coords)
+        HDfree(coords);
     if (correct_buf)
         HDfree(correct_buf);
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -1804,106 +2088,95 @@ test_write_3d_filtered_dataset_no_overlap_same_pages(void)
 
 /*
  * Tests parallel write of filtered data in the case where
- * the dataset has 3 dimensions and each process writes
- * to each "page" in the 3rd dimension. Further, each chunk
- * in each "page" is written to equally by all processes.
+ * each process writes an equal amount of data to each chunk
+ * in the dataset. Each chunk is distributed among the
+ * processes in round-robin fashion by blocks of size 1 until
+ * the whole chunk is selected, leading to an interleaved
+ * write pattern.
  *
  * Programmer: Jordan Henderson
- *             02/06/2017
+ *             02/02/2017
  */
 static void
-test_write_3d_filtered_dataset_overlap(void)
+test_write_filtered_dataset_interleaved_write(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                              hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *data        = NULL;
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t     start[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t     stride[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t     count[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t     block[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t     dataset_dims[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
+    hsize_t     chunk_dims[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
+    hsize_t     sel_dims[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
+    hsize_t     start[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
+    hsize_t     stride[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
+    hsize_t     count[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
+    hsize_t     block[INTERLEAVED_WRITE_FILTERED_DATASET_DIMS];
     size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to shared filtered chunks in 3D dataset");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing interleaved write to filtered chunks");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS;
-    dataset_dims[2] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH;
-    chunk_dims[0]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
-    chunk_dims[2]   = 1;
-    sel_dims[0]     = (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_NROWS / mpi_size);
-    sel_dims[1]     = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS;
-    sel_dims[2]     = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+    dataset_dims[0] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NROWS;
+    dataset_dims[1] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS;
+    chunk_dims[0]   = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)(INTERLEAVED_WRITE_FILTERED_DATASET_NROWS / mpi_size);
+    sel_dims[1]     = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS;
 
-    filespace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, dataset_dims, NULL);
+    filespace = H5Screate_simple(INTERLEAVED_WRITE_FILTERED_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, sel_dims, NULL);
+    memspace = H5Screate_simple(INTERLEAVED_WRITE_FILTERED_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, INTERLEAVED_WRITE_FILTERED_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+    dset_id = H5Dcreate2(group_id, INTERLEAVED_WRITE_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
                          H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
-    count[0]  = (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_NROWS / WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NROWS);
-    count[1]  = (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS / WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS);
-    count[2]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH;
-    stride[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
-    stride[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
-    stride[2] = 1;
+    count[0] =
+        (hsize_t)(INTERLEAVED_WRITE_FILTERED_DATASET_NROWS / INTERLEAVED_WRITE_FILTERED_DATASET_CH_NROWS);
+    count[1] =
+        (hsize_t)(INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS / INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS);
+    stride[0] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NROWS;
+    stride[1] = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS;
     block[0]  = 1;
-    block[1]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
-    block[2]  = 1;
+    block[1]  = (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_CH_NCOLS;
     start[0]  = (hsize_t)mpi_rank;
     start[1]  = 0;
-    start[2]  = 0;
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ]\n",
-                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], start[0], start[1],
-                 start[2], block[0], block[1], block[2]);
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
@@ -1915,8 +2188,8 @@ test_write_3d_filtered_dataset_overlap(void)
          "Hyperslab selection succeeded");
 
     /* Fill data buffer */
-    data_size        = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+    data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
     data = (C_DATATYPE *)HDcalloc(1, data_size);
     VRFY((NULL != data), "HDcalloc succeeded");
@@ -1928,30 +2201,25 @@ test_write_3d_filtered_dataset_overlap(void)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        /* Add the Column Index */
-        correct_buf[i] = (C_DATATYPE)(
-            (i % (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH * WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS))
-
-            /* Add the Row Index */
-            + ((i % (hsize_t)(mpi_size * WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH *
-                              WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS)) /
-               (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH * WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS))
-
-            /* Add the amount that gets added when a rank moves down to its next
-               section vertically in the dataset */
-            + ((hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH * WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS) *
-               (i / (hsize_t)(mpi_size * WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH *
-                              WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS))));
+        /* Add Column Index */
+        correct_buf[i] =
+            (C_DATATYPE)((i % (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS)
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+                         /* Add the Row Index */
+                         + ((i % (hsize_t)(mpi_size * INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS)) /
+                            (hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS)
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+                         /* Add the amount that gets added when a rank moves down to its next section
+                            vertically in the dataset */
+                         + ((hsize_t)INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS *
+                            (i / (hsize_t)(mpi_size * INTERLEAVED_WRITE_FILTERED_DATASET_NCOLS))));
 
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
     if (data)
         HDfree(data);
 
@@ -1961,10 +2229,10 @@ test_write_3d_filtered_dataset_overlap(void)
     read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, INTERLEAVED_WRITE_FILTERED_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
@@ -1974,116 +2242,106 @@ test_write_3d_filtered_dataset_overlap(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data to unshared
- * chunks using a compound datatype which doesn't
- * require a datatype conversion.
+ * Tests parallel write of transformed and filtered data
+ * in the case where only one process is writing to a
+ * particular chunk in the operation. Normally, a data
+ * transform function will cause the parallel library to
+ * break to independent I/O and this isn't allowed when
+ * there are filters in the pipeline. However, in this
+ * case the parallel library recognizes that the used
+ * data transform function "x" is the same as not applying
+ * the transform function. Therefore it does not apply
+ * the transform function resulting in not breaking to
+ * independent I/O.
  *
- * Programmer: Jordan Henderson
- *             02/10/2017
+ * Programmer: Jan-Willem Blokland
+ *             08/20/2021
  */
 static void
-test_write_cmpd_filtered_dataset_no_conversion_unshared(void)
+test_write_transformed_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                   hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *data        = NULL;
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    size_t               i, correct_buf_size;
-    hid_t                file_id = -1, dset_id = -1, plist_id = -1, memtype = -1;
-    hid_t                filespace = -1, memspace = -1;
+    C_DATATYPE *data        = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    hsize_t     dataset_dims[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     start[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     count[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     block[WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    size_t      i, data_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to unshared filtered chunks in Compound Datatype dataset without Datatype "
-               "conversion");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing write to unshared transformed and filtered chunks");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NCOLS;
-    chunk_dims[0]   = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
-    sel_dims[0]     = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-    sel_dims[1]     = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
 
-    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
-                                 dataset_dims, NULL);
+    filespace = H5Screate_simple(WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace =
-        H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS, sel_dims, NULL);
+    memspace = H5Screate_simple(WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
-                       chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
-
-    /* Create the compound type for memory. */
-    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
-    VRFY((memtype >= 0), "Datatype creation succeeded");
-
-    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
-         "Datatype insertion succeeded");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, memtype,
-                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME,
+                         HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
-    count[0]  = 1;
-    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
-    stride[0] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
-    block[0]  = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
-    start[0]  = 0;
-    start[1]  = ((hsize_t)mpi_rank * WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS);
+    count[0] = 1;
+    count[1] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS /
+               (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS * count[0]);
+    start[1]  = 0;
 
     if (VERBOSE_MED) {
         HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
@@ -2100,37 +2358,32 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void)
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    data = (COMPOUND_C_DATATYPE *)HDcalloc(
-        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC * sizeof(*data));
-    VRFY((NULL != data), "HDcalloc succeeded");
+    /* Fill data buffer */
+    data_size        = sel_dims[0] * sel_dims[1] * sizeof(*data);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(COMPOUND_C_DATATYPE);
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    /* Fill data buffer */
-    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC; i++) {
-        data[i].field1 = (short)GEN_DATA(i);
-        data[i].field2 = (int)GEN_DATA(i);
-        data[i].field3 = (long)GEN_DATA(i);
-    }
-
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
-        correct_buf[i].field1 = (short)((i % dataset_dims[1]) + (i / dataset_dims[1]));
-
-        correct_buf[i].field2 = (int)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-        correct_buf[i].field3 = (long)((i % dataset_dims[1]) + (i / dataset_dims[1]));
-    }
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
+                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    /* Create property list for data transform */
+    plist_id = H5Pcopy(dxpl_id);
+    VRFY((plist_id >= 0), "DXPL copy succeeded");
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    /* Set data transform expression */
+    VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded");
 
-    VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) >= 0), "Dataset write succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+         "Dataset write succeeded");
 
     if (data)
         HDfree(data);
@@ -2138,17 +2391,24 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
     /* Verify the correct data was written */
-    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME,
-                       H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+
+    /* Verify space allocation status */
+    plist_id = H5Dget_create_plist(dset_id);
+    VRFY((plist_id >= 0), "H5Dget_create_plist succeeded");
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
     if (correct_buf)
         HDfree(correct_buf);
     if (read_buf)
@@ -2157,120 +2417,110 @@ test_write_cmpd_filtered_dataset_no_conversion_unshared(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Datatype close succeeded");
     VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data to shared
- * chunks using a compound datatype which doesn't
- * require a datatype conversion.
+ * Tests parallel write of filtered data in the case where
+ * the dataset has 3 dimensions and each process writes
+ * to its own "page" in the 3rd dimension.
  *
  * Programmer: Jordan Henderson
- *             02/10/2017
+ *             02/06/2017
  */
 static void
-test_write_cmpd_filtered_dataset_no_conversion_shared(void)
+test_write_3d_filtered_dataset_no_overlap_separate_pages(const char *parent_group, H5Z_filter_t filter_id,
+                                                         hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *data        = NULL;
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    size_t               i, correct_buf_size;
-    hid_t                file_id, dset_id, plist_id, memtype;
-    hid_t                filespace, memspace;
+    C_DATATYPE *data        = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    hsize_t     dataset_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t     start[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t     stride[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t     count[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t     block[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    size_t      i, data_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to shared filtered chunks in Compound Datatype dataset without Datatype "
-               "conversion");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing write to unshared filtered chunks on separate pages in 3D dataset");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NCOLS;
-    chunk_dims[0]   = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
-    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
-    sel_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    sel_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
+    dataset_dims[2] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DEPTH;
+    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
+    chunk_dims[2]   = 1;
+    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
+    sel_dims[2]     = 1;
 
-    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
-                                 dataset_dims, NULL);
+    filespace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace =
-        H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS, sel_dims, NULL);
+    memspace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
-                       chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
-
-    /* Create the compound type for memory. */
-    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
-    VRFY((memtype >= 0), "Datatype creation succeeded");
-
-    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
-         "Datatype insertion succeeded");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, memtype,
-                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME,
+                         HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
-    count[0]  = 1;
-    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
-    stride[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
-    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
-    block[0]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank;
-    start[1]  = 0;
-
+    count[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS /
+               (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+    count[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS /
+               (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
+    count[2]  = 1;
+    stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
+    stride[2] = 1;
+    block[0]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+    block[1]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
+    block[2]  = 1;
+    start[0]  = 0;
+    start[1]  = 0;
+    start[2]  = (hsize_t)mpi_rank;
+
     if (VERBOSE_MED) {
-        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ]\n",
-                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ]\n",
+                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], start[0], start[1],
+                 start[2], block[0], block[1], block[2]);
         HDfflush(stdout);
     }
 
@@ -2281,43 +2531,27 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void)
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    data = (COMPOUND_C_DATATYPE *)HDcalloc(
-        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC * sizeof(*data));
-    VRFY((NULL != data), "HDcalloc succeeded");
-
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(COMPOUND_C_DATATYPE);
-
-    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
-
     /* Fill data buffer */
-    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC; i++) {
-        data[i].field1 = (short)GEN_DATA(i);
-        data[i].field2 = (int)GEN_DATA(i);
-        data[i].field3 = (long)GEN_DATA(i);
-    }
+    data_size        = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
-        correct_buf[i].field1 =
-            (short)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                    (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-        correct_buf[i].field2 =
-            (int)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                  (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-        correct_buf[i].field3 =
-            (long)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                   (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
-    }
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        correct_buf[i] = (C_DATATYPE)((i % (hsize_t)mpi_size) + (i / (hsize_t)mpi_size));
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) >= 0), "Dataset write succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
     if (data)
         HDfree(data);
@@ -2325,14 +2559,14 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
     /* Verify the correct data was written */
-    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id =
-        H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -2341,136 +2575,114 @@ test_write_cmpd_filtered_dataset_no_conversion_shared(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Datatype close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data to unshared
- * chunks using a compound datatype which requires a
- * datatype conversion.
- *
- * NOTE: This test currently should fail because the
- * datatype conversion causes the parallel library to
- * break to independent I/O and this isn't allowed when
- * there are filters in the pipeline.
+ * Tests parallel write of filtered data in the case where
+ * the dataset has 3 dimensions and each process writes
+ * to each "page" in the 3rd dimension. However, no chunk
+ * on a given "page" is written to by more than one process.
  *
  * Programmer: Jordan Henderson
- *             02/07/2017
+ *             02/06/2017
  */
 static void
-test_write_cmpd_filtered_dataset_type_conversion_unshared(void)
+test_write_3d_filtered_dataset_no_overlap_same_pages(const char *parent_group, H5Z_filter_t filter_id,
+                                                     hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *data        = NULL;
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    size_t               i, correct_buf_size;
-    hid_t                file_id = -1, dset_id = -1, plist_id = -1, filetype = -1, memtype = -1;
-    hid_t                filespace = -1, memspace = -1;
+    C_DATATYPE *data        = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    hsize_t     dataset_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     start[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     stride[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     count[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     block[WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    size_t      i, data_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write to unshared filtered chunks in Compound Datatype dataset with Datatype "
-               "conversion");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        HDputs("Testing write to unshared filtered chunks on the same pages in 3D dataset");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NCOLS;
-    chunk_dims[0]   = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
-    sel_dims[0]     = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-    sel_dims[1]     = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
+    dataset_dims[2] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
+    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+    chunk_dims[2]   = 1;
+    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
+    sel_dims[2]     = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
 
-    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
-                                 dataset_dims, NULL);
+    filespace =
+        H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
-                                sel_dims, NULL);
+    memspace = H5Screate_simple(WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
-                       chunk_dims) >= 0),
+    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
-
-    /* Create the compound type for memory. */
-    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
-    VRFY((memtype >= 0), "Datatype creation succeeded");
-
-    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
-         "Datatype insertion succeeded");
-
-    /* Create the compound type for file. */
-    filetype = H5Tcreate(H5T_COMPOUND, 32);
-    VRFY((filetype >= 0), "Datatype creation succeeded");
-
-    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME,
-                         filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME,
+                         HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
-    count[0]  = 1;
-    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
-    stride[0] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
-    block[0]  = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
-    start[0]  = 0;
-    start[1]  = ((hsize_t)mpi_rank * WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS);
+    count[0] = 1;
+    count[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS /
+               (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+    count[2]  = (hsize_t)mpi_size;
+    stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+    stride[2] = 1;
+    block[0]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+    block[1]  = (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+    block[2]  = 1;
+    start[0] = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS * count[0]);
+    start[1] = 0;
+    start[2] = 0;
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ]\n",
-                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ]\n",
+                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], start[0], start[1],
+                 start[2], block[0], block[1], block[2]);
         HDfflush(stdout);
     }
 
@@ -2481,50 +2693,43 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void)
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    data = (COMPOUND_C_DATATYPE *)HDcalloc(
-        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC * sizeof(*data));
-    VRFY((NULL != data), "HDcalloc succeeded");
+    /* Fill data buffer */
+    data_size        = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
 
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(COMPOUND_C_DATATYPE);
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    /* Fill data buffer */
-    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC; i++) {
-        data[i].field1 = (short)GEN_DATA(i);
-        data[i].field2 = (int)GEN_DATA(i);
-        data[i].field3 = (long)GEN_DATA(i);
-    }
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] * dataset_dims[1])) +
+                                      (i / (dataset_dims[0] * dataset_dims[1])));
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    /* Ensure that this test currently fails since type conversions break collective mode */
-    H5E_BEGIN_TRY
-    {
-        VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) < 0),
-             "Dataset write succeeded");
-    }
-    H5E_END_TRY;
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
     if (data)
         HDfree(data);
 
-    /* Verify that no data was written */
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    /* Verify the correct data was written */
+    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME,
-                       H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -2533,93 +2738,260 @@ test_write_cmpd_filtered_dataset_type_conversion_unshared(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel write of filtered data to shared
- * chunks using a compound datatype which requires
- * a datatype conversion.
- *
- * NOTE: This test currently should fail because the
- * datatype conversion causes the parallel library to
- * break to independent I/O and this isn't allowed when
- * there are filters in the pipeline.
+ * Tests parallel write of filtered data in the case where
+ * the dataset has 3 dimensions and each process writes
+ * to each "page" in the 3rd dimension. Further, each chunk
+ * in each "page" is written to equally by all processes.
  *
  * Programmer: Jordan Henderson
- *             02/10/2017
+ *             02/06/2017
  */
 static void
-test_write_cmpd_filtered_dataset_type_conversion_shared(void)
+test_write_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                       hid_t dcpl_id, hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *data        = NULL;
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    size_t               i, correct_buf_size;
-    hid_t                file_id, dset_id, plist_id, filetype, memtype;
-    hid_t                filespace, memspace;
-
-    if (MAINPROCESS)
-        HDputs(
-            "Testing write to shared filtered chunks in Compound Datatype dataset with Datatype conversion");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    C_DATATYPE *data        = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    hsize_t     dataset_dims[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t     start[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t     stride[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t     count[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t     block[WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    size_t      i, data_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+
+    if (MAINPROCESS)
+        HDputs("Testing write to shared filtered chunks in 3D dataset");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS;
+    dataset_dims[2] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+    chunk_dims[0]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+    chunk_dims[2]   = 1;
+    sel_dims[0]     = (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_NROWS / mpi_size);
+    sel_dims[1]     = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS;
+    sel_dims[2]     = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+
+    filespace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+    memspace = H5Screate_simple(WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, sel_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+    VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, chunk_dims) >= 0),
+         "Chunk size set");
+
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+    dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0]  = (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_NROWS / WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NROWS);
+    count[1]  = (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS / WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS);
+    count[2]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+    stride[0] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+    stride[2] = 1;
+    block[0]  = 1;
+    block[1]  = (hsize_t)WRITE_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+    block[2]  = 1;
+    start[0]  = (hsize_t)mpi_rank;
+    start[1]  = 0;
+    start[2]  = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ]\n",
+                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], start[0], start[1],
+                 start[2], block[0], block[1], block[2]);
+        HDfflush(stdout);
+    }
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    /* Fill data buffer */
+    data_size        = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
+
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        /* Add the Column Index */
+        correct_buf[i] = (C_DATATYPE)(
+            (i % (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH * WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS))
+
+            /* Add the Row Index */
+            + ((i % (hsize_t)(mpi_size * WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH *
+                              WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS)) /
+               (hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH * WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS))
+
+            /* Add the amount that gets added when a rank moves down to its next
+               section vertically in the dataset */
+            + ((hsize_t)(WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH * WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS) *
+               (i / (hsize_t)(mpi_size * WRITE_SHARED_FILTERED_CHUNKS_3D_DEPTH *
+                              WRITE_SHARED_FILTERED_CHUNKS_3D_NCOLS))));
+
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
+
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+    if (data)
+        HDfree(data);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+
+    /* Verify the correct data was written */
+    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (correct_buf)
+        HDfree(correct_buf);
+    if (read_buf)
+        HDfree(read_buf);
+
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests parallel write of filtered data to unshared
+ * chunks using a compound datatype which doesn't
+ * require a datatype conversion.
+ *
+ * Programmer: Jordan Henderson
+ *             02/10/2017
+ */
+static void
+test_write_cmpd_filtered_dataset_no_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id,
+                                                        hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
+{
+    COMPOUND_C_DATATYPE *data        = NULL;
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    size_t               i, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID,
+          memtype   = H5I_INVALID_HID;
+    hid_t group_id  = H5I_INVALID_HID;
+    hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+
+    if (MAINPROCESS)
+        HDputs("Testing write to unshared filtered chunks in Compound Datatype dataset without Datatype "
+               "conversion");
+
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NCOLS;
-    chunk_dims[0]   = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
-    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
-    sel_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    sel_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    dataset_dims[0] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NCOLS;
+    chunk_dims[0]   = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
+    sel_dims[0]     = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
 
-    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
+    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
                                  dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
     memspace =
-        H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS, sel_dims, NULL);
+        H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
+    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
                        chunk_dims) >= 0),
          "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
     /* Create the compound type for memory. */
     memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
@@ -2632,32 +3004,26 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void)
     VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
          "Datatype insertion succeeded");
 
-    /* Create the compound type for file. */
-    filetype = H5Tcreate(H5T_COMPOUND, 32);
-    VRFY((filetype >= 0), "Datatype creation succeeded");
-
-    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-
-    dset_id = H5Dcreate2(file_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME,
-                         filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME,
+                         memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /* Each process defines the dataset selection in memory and writes
      * it to the hyperslab in the file
      */
     count[0]  = 1;
-    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
-    stride[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
-    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
-    block[0]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank;
-    start[1]  = 0;
+    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    stride[0] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
+    block[0]  = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
+    start[0]  = 0;
+    start[1]  = ((hsize_t)mpi_rank * WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS);
 
     if (VERBOSE_MED) {
         HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
@@ -2675,7 +3041,7 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void)
          "Hyperslab selection succeeded");
 
     data = (COMPOUND_C_DATATYPE *)HDcalloc(
-        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC * sizeof(*data));
+        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC * sizeof(*data));
     VRFY((NULL != data), "HDcalloc succeeded");
 
     correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(COMPOUND_C_DATATYPE);
@@ -2684,40 +3050,39 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void)
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
     /* Fill data buffer */
-    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC; i++) {
+    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC; i++) {
         data[i].field1 = (short)GEN_DATA(i);
         data[i].field2 = (int)GEN_DATA(i);
         data[i].field3 = (long)GEN_DATA(i);
     }
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
+        correct_buf[i].field1 = (short)((i % dataset_dims[1]) + (i / dataset_dims[1]));
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+        correct_buf[i].field2 = (int)((i % dataset_dims[1]) + (i / dataset_dims[1]));
 
-    /* Ensure that this test currently fails since type conversions break collective mode */
-    H5E_BEGIN_TRY
-    {
-        VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, plist_id, data) < 0),
-             "Dataset write succeeded");
+        correct_buf[i].field3 = (long)((i % dataset_dims[1]) + (i / dataset_dims[1]));
     }
-    H5E_END_TRY;
+
+    VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded");
+
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
     if (data)
         HDfree(data);
 
-    /* Verify that no data was written */
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
+    /* Verify the correct data was written */
     read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME,
-                       H5P_DEFAULT);
+    dset_id =
+        H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0), "Dataset read succeeded");
+    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
 
     VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -2726,689 +3091,642 @@ test_write_cmpd_filtered_dataset_type_conversion_shared(void)
     if (read_buf)
         HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
-#endif
 
 /*
- * Tests parallel read of filtered data in the special
- * case where a dataset is composed of a single chunk.
- *
- * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * the singular chunk and contributes its piece to a
- * global buffer that is checked for consistency.
+ * Tests parallel write of filtered data to shared
+ * chunks using a compound datatype which doesn't
+ * require a datatype conversion.
  *
  * Programmer: Jordan Henderson
- *             05/14/2018
+ *             02/10/2017
  */
 static void
-test_read_one_chunk_filtered_dataset(void)
+test_write_cmpd_filtered_dataset_no_conversion_shared(const char *parent_group, H5Z_filter_t filter_id,
+                                                      hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *read_buf    = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    C_DATATYPE *global_buf  = NULL;
-    hsize_t     dataset_dims[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
-    hsize_t     sel_dims[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
-    hsize_t     start[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
-    hsize_t     stride[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
-    hsize_t     count[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
-    hsize_t     block[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
-    hsize_t     flat_dims[1];
-    size_t      i, read_buf_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
-    int *       recvcounts = NULL;
-    int *       displs     = NULL;
+    COMPOUND_C_DATATYPE *data        = NULL;
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    size_t               i, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID,
+          memtype   = H5I_INVALID_HID;
+    hid_t group_id  = H5I_INVALID_HID;
+    hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing read from one-chunk filtered dataset");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    dataset_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NROWS;
-    dataset_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NCOLS;
-
-    /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
-
-    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
-
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = ((C_DATATYPE)i % (READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS / mpi_size *
-                                           READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS)) +
-                         ((C_DATATYPE)i / (READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS / mpi_size *
-                                           READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS));
-
-    if (MAINPROCESS) {
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
-
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
-
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
-
-        /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_ONE_CHUNK_FILTERED_DATASET_DIMS, dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
-
-        /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS;
-
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
-
-        VRFY((H5Pset_chunk(plist_id, READ_ONE_CHUNK_FILTERED_DATASET_DIMS, chunk_dims) >= 0),
-             "Chunk size set");
-
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        HDputs("Testing write to shared filtered chunks in Compound Datatype dataset without Datatype "
+               "conversion");
 
-        dset_id = H5Dcreate2(file_id, READ_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
-                             H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
-             "Dataset write succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-    }
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
+    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
+    sel_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    sel_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
+                                 dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    memspace =
+        H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS, sel_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
+                       chunk_dims) >= 0),
+         "Chunk size set");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dopen2(file_id, "/" READ_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+    /* Create the compound type for memory. */
+    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
+    VRFY((memtype >= 0), "Datatype creation succeeded");
 
-    sel_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NROWS / (hsize_t)mpi_size;
-    sel_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NCOLS;
+    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
+         "Datatype insertion succeeded");
 
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1];
+    dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, memtype,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /*
-     * Each process defines the dataset selection in the file and
-     * reads it to the selection in memory
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
      */
     count[0]  = 1;
-    count[1]  = 1;
-    stride[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS;
-    stride[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS;
-    block[0]  = sel_dims[0];
-    block[1]  = sel_dims[1];
-    start[0]  = ((hsize_t)mpi_rank * sel_dims[0]);
+    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    stride[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
+    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank;
     start[1]  = 0;
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ]\n",
                  mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    data = (COMPOUND_C_DATATYPE *)HDcalloc(
+        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC * sizeof(*data));
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(COMPOUND_C_DATATYPE);
 
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
+    /* Fill data buffer */
+    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC; i++) {
+        data[i].field1 = (short)GEN_DATA(i);
+        data[i].field2 = (int)GEN_DATA(i);
+        data[i].field3 = (long)GEN_DATA(i);
+    }
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
-         "Dataset read succeeded");
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
+        correct_buf[i].field1 =
+            (short)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                    (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
 
-    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
+        correct_buf[i].field2 =
+            (int)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                  (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
 
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+        correct_buf[i].field3 =
+            (long)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                   (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    }
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)flat_dims[0];
+    VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) >= 0), "Dataset write succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0]);
+    if (data)
+        HDfree(data);
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
-                                        displs, C_DATATYPE_MPI, comm)),
-         "MPI_Allgatherv succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    /* Verify the correct data was written */
+    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    dset_id =
+        H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
+
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
-    if (displs)
-        HDfree(displs);
-    if (recvcounts)
-        HDfree(recvcounts);
-    if (global_buf)
-        HDfree(global_buf);
-    if (read_buf)
-        HDfree(read_buf);
     if (correct_buf)
         HDfree(correct_buf);
+    if (read_buf)
+        HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data in the case where only
- * one process is reading from a particular chunk in the operation.
+ * Tests parallel write of filtered data to unshared
+ * chunks using a compound datatype which requires a
+ * datatype conversion.
  *
- * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * the dataset and contributes its piece to a global buffer
- * that is checked for consistency.
+ * NOTE: This test currently should fail for mpi_size > 1
+ * because the datatype conversion causes the parallel
+ * library to break to independent I/O and this isn't
+ * allowed when there are filters in the pipeline,
+ * unless there is only one MPI rank.
  *
  * Programmer: Jordan Henderson
- *             05/15/2018
+ *             02/07/2017
  */
 static void
-test_read_filtered_dataset_no_overlap(void)
+test_write_cmpd_filtered_dataset_type_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id,
+                                                          hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *read_buf    = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    C_DATATYPE *global_buf  = NULL;
-    hsize_t     dataset_dims[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     start[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     stride[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     count[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     block[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     flat_dims[1];
-    size_t      i, read_buf_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
-    int *       recvcounts = NULL;
-    int *       displs     = NULL;
+    COMPOUND_C_DATATYPE *data        = NULL;
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    size_t               i, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID,
+          filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID;
+    hid_t group_id  = H5I_INVALID_HID;
+    hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing read from unshared filtered chunks");
+        HDputs("Testing write to unshared filtered chunks in Compound Datatype dataset with Datatype "
+               "conversion");
 
-    CHECK_CUR_FILTER_AVAIL();
+    /* Skip for MPI communicator size of 1 */
+    if (mpi_size == 1) {
+        SKIPPED();
+        return;
+    }
 
-    dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS;
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
 
-    /* Setup the buffer for writing and for comparison */
-    correct_buf_size = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NROWS *
-                       (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS * sizeof(*correct_buf);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
-
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
-                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
-
-    if (MAINPROCESS) {
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
-
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
-
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
-
-        /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
-
-        /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
-
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
-
-        VRFY((H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
-             "Chunk size set");
-
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-        dset_id = H5Dcreate2(file_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
-                             filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
-
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NCOLS;
+    chunk_dims[0]   = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
+    sel_dims[0]     = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
 
-        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
-             "Dataset write succeeded");
+    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
+                                 dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-    }
+    memspace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
+                                sel_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
+                       chunk_dims) >= 0),
+         "Chunk size set");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    /* Create the compound type for memory. */
+    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
+    VRFY((memtype >= 0), "Datatype creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
+         "Datatype insertion succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+    /* Create the compound type for file. */
+    filetype = H5Tcreate(H5T_COMPOUND, 32);
+    VRFY((filetype >= 0), "Datatype creation succeeded");
 
-    sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
-    sel_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS;
+    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
 
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1];
+    dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME,
+                         filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /*
-     * Each process defines the dataset selection in the file and reads
-     * it to the selection in memory
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
      */
-    count[0] = 1;
-    count[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS / (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
-    block[1]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
-    start[0]  = ((hsize_t)mpi_rank * (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS * count[0]);
-    start[1]  = 0;
+    count[0]  = 1;
+    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    stride[0] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
+    block[0]  = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
+    start[0]  = 0;
+    start[1]  = ((hsize_t)mpi_rank * WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS);
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ]\n",
                  mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    data = (COMPOUND_C_DATATYPE *)HDcalloc(
+        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC * sizeof(*data));
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(COMPOUND_C_DATATYPE);
 
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
+    /* Fill data buffer */
+    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC; i++) {
+        data[i].field1 = (short)GEN_DATA(i);
+        data[i].field2 = (int)GEN_DATA(i);
+        data[i].field3 = (long)GEN_DATA(i);
+    }
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
-         "Dataset read succeeded");
+    /* Ensure that this test currently fails since type conversions break collective mode */
+    H5E_BEGIN_TRY
+    {
+        VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) < 0), "Dataset write succeeded");
+    }
+    H5E_END_TRY;
 
-    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, NO_CHUNKS_WRITTEN);
 
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    if (data)
+        HDfree(data);
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)flat_dims[0];
+    /* Verify that no data was written */
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0]);
+    dset_id =
+        H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
-                                        displs, C_DATATYPE_MPI, comm)),
-         "MPI_Allgatherv succeeded");
+    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
 
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
-    if (displs)
-        HDfree(displs);
-    if (recvcounts)
-        HDfree(recvcounts);
-    if (global_buf)
-        HDfree(global_buf);
-    if (read_buf)
-        HDfree(read_buf);
     if (correct_buf)
         HDfree(correct_buf);
+    if (read_buf)
+        HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data in the case where
- * more than one process is reading from a particular chunk
- * in the operation.
+ * Tests parallel write of filtered data to shared
+ * chunks using a compound datatype which requires
+ * a datatype conversion.
  *
- * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * each chunk of the dataset and contributes its pieces
- * to a global buffer that is checked for consistency.
+ * NOTE: This test currently should fail for mpi_size > 1
+ * because the datatype conversion causes the parallel
+ * library to break to independent I/O and this isn't
+ * allowed when there are filters in the pipeline,
+ * unless there is only one MPI rank.
  *
  * Programmer: Jordan Henderson
- *             05/15/2018
+ *             02/10/2017
  */
 static void
-test_read_filtered_dataset_overlap(void)
+test_write_cmpd_filtered_dataset_type_conversion_shared(const char *parent_group, H5Z_filter_t filter_id,
+                                                        hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *read_buf    = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    C_DATATYPE *global_buf  = NULL;
-    hsize_t     dataset_dims[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     start[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     stride[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     count[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     block[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     flat_dims[1];
-    size_t      i, read_buf_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
-    int *       recvcounts = NULL;
-    int *       displs     = NULL;
+    COMPOUND_C_DATATYPE *data        = NULL;
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    hsize_t              dataset_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              sel_dims[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              start[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              stride[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              count[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              block[WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    size_t               i, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t                filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID;
+    hid_t                group_id  = H5I_INVALID_HID;
+    hid_t                filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing read from shared filtered chunks");
+        HDputs(
+            "Testing write to shared filtered chunks in Compound Datatype dataset with Datatype conversion");
 
-    CHECK_CUR_FILTER_AVAIL();
+    /* Skip for MPI communicator size of 1 */
+    if (mpi_size == 1) {
+        SKIPPED();
+        return;
+    }
 
-    dataset_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NCOLS;
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
 
-    /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)(
-            (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-            (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
+    chunk_dims[1]   = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
+    sel_dims[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    sel_dims[1] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
 
-    if (MAINPROCESS) {
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
+    filespace = H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
+                                 dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
+    memspace =
+        H5Screate_simple(WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS, sel_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((H5Pset_chunk(plist_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
+                       chunk_dims) >= 0),
+         "Chunk size set");
 
-        /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
-
-        /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
-
-        VRFY((H5Pset_chunk(plist_id, READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
-             "Chunk size set");
-
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
-
-        dset_id = H5Dcreate2(file_id, READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
-                             H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
-
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-
-        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
-             "Dataset write succeeded");
-
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-    }
-
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    /* Create the compound type for memory. */
+    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
+    VRFY((memtype >= 0), "Datatype creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
+         "Datatype insertion succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+    /* Create the compound type for file. */
+    filetype = H5Tcreate(H5T_COMPOUND, 32);
+    VRFY((filetype >= 0), "Datatype creation succeeded");
 
-    sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR;
-    sel_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
+    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
 
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1];
+    dset_id = H5Dcreate2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME,
+                         filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /*
-     * Each process defines the dataset selection in the file and
-     * reads it to the selection in memory
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
      */
-    count[0]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NROWS / (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS;
-    count[1]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NCOLS / (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS / (hsize_t)mpi_size;
-    block[1]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank * block[0];
+    count[0]  = 1;
+    count[1]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    stride[0] = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
+    stride[1] = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank;
     start[1]  = 0;
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ]\n",
                  mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+    data = (COMPOUND_C_DATATYPE *)HDcalloc(
+        1, (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC * sizeof(*data));
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-    read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(COMPOUND_C_DATATYPE);
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
-         "Dataset read succeeded");
+    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
+    /* Fill data buffer */
+    for (i = 0; i < (hsize_t)WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC; i++) {
+        data[i].field1 = (short)GEN_DATA(i);
+        data[i].field2 = (int)GEN_DATA(i);
+        data[i].field3 = (long)GEN_DATA(i);
+    }
 
-    /*
-     * Since these chunks are shared, run multiple rounds of MPI_Allgatherv
-     * to collect all of the pieces into their appropriate locations. The
-     * number of times MPI_Allgatherv is run should be equal to the number
-     * of chunks in the first dimension of the dataset.
-     */
+    /* Ensure that this test currently fails since type conversions break collective mode */
+    H5E_BEGIN_TRY
     {
-        size_t loop_count       = count[0];
-        size_t total_recvcounts = 0;
+        VRFY((H5Dwrite(dset_id, memtype, memspace, filespace, dxpl_id, data) < 0), "Dataset write succeeded");
+    }
+    H5E_END_TRY;
 
-        recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-        VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, NO_CHUNKS_WRITTEN);
 
-        displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-        VRFY((NULL != displs), "HDcalloc succeeded");
+    if (data)
+        HDfree(data);
 
-        for (i = 0; i < (size_t)mpi_size; i++) {
-            recvcounts[i] = (int)dataset_dims[1];
-            total_recvcounts += (size_t)recvcounts[i];
-        }
+    /* Verify that no data was written */
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-        for (i = 0; i < (size_t)mpi_size; i++)
-            displs[i] = (int)(i * dataset_dims[1]);
+    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-        for (; loop_count; loop_count--) {
-            VRFY((MPI_SUCCESS == MPI_Allgatherv(&read_buf[(count[0] - loop_count) * dataset_dims[1]],
-                                                recvcounts[mpi_rank], C_DATATYPE_MPI,
-                                                &global_buf[(count[0] - loop_count) * total_recvcounts],
-                                                recvcounts, displs, C_DATATYPE_MPI, comm)),
-                 "MPI_Allgatherv succeeded");
-        }
-    }
+    dset_id =
+        H5Dopen2(group_id, WRITE_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    VRFY((H5Dread(dset_id, memtype, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
+
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
-    if (displs)
-        HDfree(displs);
-    if (recvcounts)
-        HDfree(recvcounts);
-    if (global_buf)
-        HDfree(global_buf);
-    if (read_buf)
-        HDfree(read_buf);
     if (correct_buf)
         HDfree(correct_buf);
+    if (read_buf)
+        HDfree(read_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
+#endif
 
 /*
- * Tests parallel read of filtered data in the case where
- * a single process in the read operation has no selection
- * in the dataset's dataspace.
+ * Tests parallel read of filtered data in the special
+ * case where a dataset is composed of a single chunk.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank (except for one)
- * reads a part of the dataset and contributes its piece
- * to a global buffer that is checked for consistency.
+ * data to the dataset. Then, each rank reads a part of
+ * the singular chunk and contributes its piece to a
+ * global buffer that is checked for consistency.
  *
  * Programmer: Jordan Henderson
- *             05/15/2018
+ *             05/14/2018
  */
 static void
-test_read_filtered_dataset_single_no_selection(void)
+test_read_one_chunk_filtered_dataset(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                     hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
     C_DATATYPE *global_buf  = NULL;
-    hsize_t     dataset_dims[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     start[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     stride[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     count[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     block[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     dataset_dims[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
+    hsize_t     sel_dims[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
+    hsize_t     start[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
+    hsize_t     stride[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
+    hsize_t     count[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
+    hsize_t     block[READ_ONE_CHUNK_FILTERED_DATASET_DIMS];
     hsize_t     flat_dims[1];
     size_t      i, read_buf_size, correct_buf_size;
-    size_t      segment_length;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
     int *       recvcounts = NULL;
     int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs("Testing read from filtered chunks with a single process having no selection");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from one-chunk filtered dataset");
 
-    dataset_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
+    dataset_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NROWS;
+    dataset_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
     correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
@@ -3417,13 +3735,10 @@ test_read_filtered_dataset_single_no_selection(void)
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
-                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
-
-    /* Compute the correct offset into the buffer for the process having no selection and clear it */
-    segment_length = dataset_dims[0] * dataset_dims[1] / (hsize_t)mpi_size;
-    HDmemset(correct_buf + ((size_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC * segment_length),
-             0, segment_length * sizeof(*correct_buf));
+        correct_buf[i] = ((C_DATATYPE)i % (READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS / mpi_size *
+                                           READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS)) +
+                         ((C_DATATYPE)i / (READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS / mpi_size *
+                                           READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS));
 
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
@@ -3437,60 +3752,58 @@ test_read_filtered_dataset_single_no_selection(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace =
-            H5Screate_simple(READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+        filespace = H5Screate_simple(READ_ONE_CHUNK_FILTERED_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+        chunk_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+        VRFY((H5Pset_chunk(plist_id, READ_ONE_CHUNK_FILTERED_DATASET_DIMS, chunk_dims) >= 0),
              "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME,
-                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        dset_id = H5Dcreate2(group_id, READ_ONE_CHUNK_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                             H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
         VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
         VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, READ_ONE_CHUNK_FILTERED_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    sel_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
-
-    if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
-        sel_dims[0] = sel_dims[1] = 0;
+    sel_dims[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NROWS / (hsize_t)mpi_size;
+    sel_dims[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_NCOLS;
 
     /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
     flat_dims[0] = sel_dims[0] * sel_dims[1];
@@ -3506,14 +3819,13 @@ test_read_filtered_dataset_single_no_selection(void)
      * Each process defines the dataset selection in the file and
      * reads it to the selection in memory
      */
-    count[0] = 1;
-    count[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS /
-               (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-    block[1]  = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank * (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS * count[0];
+    count[0]  = 1;
+    count[1]  = 1;
+    stride[0] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NROWS;
+    stride[1] = (hsize_t)READ_ONE_CHUNK_FILTERED_DATASET_CH_NCOLS;
+    block[0]  = sel_dims[0];
+    block[1]  = sel_dims[1];
+    start[0]  = ((hsize_t)mpi_rank * sel_dims[0]);
     start[1]  = 0;
 
     if (VERBOSE_MED) {
@@ -3524,24 +3836,15 @@ test_read_filtered_dataset_single_no_selection(void)
         HDfflush(stdout);
     }
 
-    if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
-        VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
-    else
-        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-             "Hyperslab selection succeeded");
-
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
     read_buf_size = flat_dims[0] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
@@ -3552,25 +3855,17 @@ test_read_filtered_dataset_single_no_selection(void)
     VRFY((NULL != recvcounts), "HDcalloc succeeded");
 
     for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)(READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS *
-                              READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS);
-    recvcounts[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC] = 0;
+        recvcounts[i] = (int)flat_dims[0];
 
     displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
     VRFY((NULL != displs), "HDcalloc succeeded");
 
     for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * (size_t)(READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS *
-                                       READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS));
+        displs[i] = (int)(i * flat_dims[0]);
 
-    if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
-        VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, 0, C_DATATYPE_MPI, global_buf, recvcounts, displs,
-                                            C_DATATYPE_MPI, comm)),
-             "MPI_Allgatherv succeeded");
-    else
-        VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf,
-                                            recvcounts, displs, C_DATATYPE_MPI, comm)),
-             "MPI_Allgatherv succeeded");
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
+                                        displs, C_DATATYPE_MPI, comm)),
+         "MPI_Allgatherv succeeded");
 
     VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -3588,52 +3883,63 @@ test_read_filtered_dataset_single_no_selection(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data in the case where
- * no process in the read operation has a selection in the
- * dataset's dataspace. This test is to ensure that there
- * are no assertion failures or similar issues due to size
- * 0 allocations and the like.
+ * Tests parallel read of filtered data in the case where only
+ * one process is reading from a particular chunk in the operation.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank will simply issue
- * a no-op read.
+ * data to the dataset. Then, each rank reads a part of
+ * the dataset and contributes its piece to a global buffer
+ * that is checked for consistency.
  *
  * Programmer: Jordan Henderson
  *             05/15/2018
  */
 static void
-test_read_filtered_dataset_all_no_selection(void)
+test_read_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                      hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    size_t      read_buf_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    C_DATATYPE *global_buf  = NULL;
+    hsize_t     dataset_dims[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     start[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     count[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     block[READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     flat_dims[1];
+    size_t      i, read_buf_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int *       recvcounts = NULL;
+    int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs("Testing read from filtered chunks with all processes having no selection");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from unshared filtered chunks");
 
-    dataset_dims[0] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
+    dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+    correct_buf_size = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NROWS *
+                       (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS * sizeof(*correct_buf);
 
     correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
+                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
+
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
         VRFY((plist_id >= 0), "FAPL creation succeeded");
@@ -3646,79 +3952,129 @@ test_read_filtered_dataset_all_no_selection(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+        filespace = H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+        chunk_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+        VRFY((H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
              "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+        dset_id = H5Dcreate2(group_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
                              filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
         VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
         VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, READ_UNSHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = sel_dims[1] = 0;
+    sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
+    sel_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS;
 
-    memspace = H5Screate_simple(READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Select hyperslab in the file */
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
+    /*
+     * Each process defines the dataset selection in the file and reads
+     * it to the selection in memory
+     */
+    count[0] = 1;
+    count[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_NCOLS / (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NCOLS;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_CH_NROWS * count[0]);
+    start[1]  = 0;
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
-    read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf);
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
+    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)flat_dims[0];
+
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * flat_dims[0]);
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
+                                        displs, C_DATATYPE_MPI, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (global_buf)
+        HDfree(global_buf);
     if (read_buf)
         HDfree(read_buf);
     if (correct_buf)
@@ -3727,50 +4083,52 @@ test_read_filtered_dataset_all_no_selection(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data by using point
- * selections instead of hyperslab selections.
+ * Tests parallel read of filtered data in the case where
+ * more than one process is reading from a particular chunk
+ * in the operation.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank will read part
- * of the dataset using a point selection and will
- * contribute its piece to a global buffer that is
- * checked for consistency.
+ * data to the dataset. Then, each rank reads a part of
+ * each chunk of the dataset and contributes its pieces
+ * to a global buffer that is checked for consistency.
  *
  * Programmer: Jordan Henderson
  *             05/15/2018
  */
 static void
-test_read_filtered_dataset_point_selection(void)
+test_read_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                   hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *correct_buf = NULL;
     C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
     C_DATATYPE *global_buf  = NULL;
-    hsize_t *   coords      = NULL;
-    hsize_t     dataset_dims[READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     dataset_dims[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     start[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     count[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     block[READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS];
     hsize_t     flat_dims[1];
-    size_t      i, j, read_buf_size, correct_buf_size;
-    size_t      num_points;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    size_t      i, read_buf_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
     int *       recvcounts = NULL;
     int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs("Testing read from filtered chunks with point selection");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from shared filtered chunks");
 
-    dataset_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
+    dataset_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
     correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
@@ -3795,56 +4153,58 @@ test_read_filtered_dataset_point_selection(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+        filespace = H5Screate_simple(READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+        chunk_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+        VRFY((H5Pset_chunk(plist_id, READ_SHARED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
              "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+        dset_id = H5Dcreate2(group_id, READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
                              filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
         VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
         VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, READ_SHARED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS / (hsize_t)mpi_size;
-    sel_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
+    sel_dims[0] = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
 
     /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
     flat_dims[0] = sel_dims[0] * sel_dims[1];
@@ -3852,37 +4212,40 @@ test_read_filtered_dataset_point_selection(void)
     memspace = H5Screate_simple(1, flat_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
-    /* Set up point selection */
+    /* Select hyperslab in the file */
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    num_points = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS *
-                 (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS / (hsize_t)mpi_size;
-    coords = (hsize_t *)HDcalloc(1, 2 * num_points * sizeof(*coords));
-    VRFY((NULL != coords), "Coords HDcalloc succeeded");
-
-    for (i = 0; i < num_points; i++)
-        for (j = 0; j < READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS; j++)
-            coords[(i * READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS) + j] =
-                (j > 0) ? (i % (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)
-                        : ((hsize_t)mpi_rank +
-                           ((hsize_t)mpi_size * (i / (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)));
-
-    VRFY((H5Sselect_elements(filespace, H5S_SELECT_SET, (hsize_t)num_points, (const hsize_t *)coords) >= 0),
-         "Point selection succeeded");
+    /*
+     * Each process defines the dataset selection in the file and
+     * reads it to the selection in memory
+     */
+    count[0]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NROWS / (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS;
+    count[1]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_NCOLS / (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * block[0];
+    start[1]  = 0;
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
     read_buf_size = flat_dims[0] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
@@ -3895,9 +4258,8 @@ test_read_filtered_dataset_point_selection(void)
      * of chunks in the first dimension of the dataset.
      */
     {
-        size_t original_loop_count = dataset_dims[0] / (hsize_t)mpi_size;
-        size_t cur_loop_count      = original_loop_count;
-        size_t total_recvcounts    = 0;
+        size_t loop_count       = count[0];
+        size_t total_recvcounts = 0;
 
         recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
         VRFY((NULL != recvcounts), "HDcalloc succeeded");
@@ -3913,12 +4275,11 @@ test_read_filtered_dataset_point_selection(void)
         for (i = 0; i < (size_t)mpi_size; i++)
             displs[i] = (int)(i * dataset_dims[1]);
 
-        for (; cur_loop_count; cur_loop_count--) {
-            VRFY((MPI_SUCCESS ==
-                  MPI_Allgatherv(&read_buf[(original_loop_count - cur_loop_count) * dataset_dims[1]],
-                                 recvcounts[mpi_rank], C_DATATYPE_MPI,
-                                 &global_buf[(original_loop_count - cur_loop_count) * total_recvcounts],
-                                 recvcounts, displs, C_DATATYPE_MPI, comm)),
+        for (; loop_count; loop_count--) {
+            VRFY((MPI_SUCCESS == MPI_Allgatherv(&read_buf[(count[0] - loop_count) * dataset_dims[1]],
+                                                recvcounts[mpi_rank], C_DATATYPE_MPI,
+                                                &global_buf[(count[0] - loop_count) * total_recvcounts],
+                                                recvcounts, displs, C_DATATYPE_MPI, comm)),
                  "MPI_Allgatherv succeeded");
         }
     }
@@ -3936,12 +4297,10 @@ test_read_filtered_dataset_point_selection(void)
     if (correct_buf)
         HDfree(correct_buf);
 
-    HDfree(coords);
-
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -3949,47 +4308,45 @@ test_read_filtered_dataset_point_selection(void)
 
 /*
  * Tests parallel read of filtered data in the case where
- * each process reads an equal amount of data from each
- * chunk in the dataset. Each chunk is distributed among the
- * processes in round-robin fashion by blocks of size 1 until
- * the whole chunk is selected, leading to an interleaved
- * read pattern.
+ * a single process in the read operation has no selection
+ * in the dataset's dataspace.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank will read part
- * of each chunk of the dataset and will contribute its
- * pieces to a global buffer that is checked for consistency.
+ * data to the dataset. Then, each rank (except for one)
+ * reads a part of the dataset and contributes its piece
+ * to a global buffer that is checked for consistency.
  *
  * Programmer: Jordan Henderson
  *             05/15/2018
  */
 static void
-test_read_filtered_dataset_interleaved_read(void)
+test_read_filtered_dataset_single_no_selection(const char *parent_group, H5Z_filter_t filter_id,
+                                               hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
     C_DATATYPE *global_buf  = NULL;
-    hsize_t     dataset_dims[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
-    hsize_t     chunk_dims[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
-    hsize_t     sel_dims[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
-    hsize_t     start[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
-    hsize_t     stride[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
-    hsize_t     count[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
-    hsize_t     block[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     dataset_dims[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     start[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     count[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     block[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
     hsize_t     flat_dims[1];
     size_t      i, read_buf_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    size_t      segment_length;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
     int *       recvcounts = NULL;
     int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs("Testing interleaved read from filtered chunks");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from filtered chunks with a single process having no selection");
 
-    dataset_dims[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NROWS;
-    dataset_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS;
+    dataset_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
     correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
@@ -3998,18 +4355,13 @@ test_read_filtered_dataset_interleaved_read(void)
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        /* Add Column Index */
-        correct_buf[i] =
-            (C_DATATYPE)((i % (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS)
-
-                         /* Add the Row Index */
-                         + ((i % (hsize_t)(mpi_size * INTERLEAVED_READ_FILTERED_DATASET_NCOLS)) /
-                            (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS)
+        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
+                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
 
-                         /* Add the amount that gets added when a rank moves down to its next section
-                            vertically in the dataset */
-                         + ((hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS *
-                            (i / (hsize_t)(mpi_size * INTERLEAVED_READ_FILTERED_DATASET_NCOLS))));
+    /* Compute the correct offset into the buffer for the process having no selection and clear it */
+    segment_length = dataset_dims[0] * dataset_dims[1] / (hsize_t)mpi_size;
+    HDmemset(correct_buf + ((size_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC * segment_length),
+             0, segment_length * sizeof(*correct_buf));
 
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
@@ -4023,56 +4375,62 @@ test_read_filtered_dataset_interleaved_read(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(INTERLEAVED_READ_FILTERED_DATASET_DIMS, dataset_dims, NULL);
+        filespace =
+            H5Screate_simple(READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NROWS;
-        chunk_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS;
+        chunk_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, INTERLEAVED_READ_FILTERED_DATASET_DIMS, chunk_dims) >= 0),
+        VRFY((H5Pset_chunk(plist_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
              "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, INTERLEAVED_READ_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
-                             H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        dset_id = H5Dcreate2(group_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME,
+                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
         VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
         VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" INTERLEAVED_READ_FILTERED_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = (hsize_t)(INTERLEAVED_READ_FILTERED_DATASET_NROWS / mpi_size);
-    sel_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS;
+    sel_dims[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    sel_dims[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
+
+    if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
+        sel_dims[0] = sel_dims[1] = 0;
 
     /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
     flat_dims[0] = sel_dims[0] * sel_dims[1];
@@ -4088,15 +4446,14 @@ test_read_filtered_dataset_interleaved_read(void)
      * Each process defines the dataset selection in the file and
      * reads it to the selection in memory
      */
-    count[0] =
-        (hsize_t)(INTERLEAVED_READ_FILTERED_DATASET_NROWS / INTERLEAVED_READ_FILTERED_DATASET_CH_NROWS);
-    count[1] =
-        (hsize_t)(INTERLEAVED_READ_FILTERED_DATASET_NCOLS / INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS);
-    stride[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NROWS;
-    stride[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS;
-    block[0]  = 1;
-    block[1]  = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank;
+    count[0] = 1;
+    count[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS /
+               (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * (hsize_t)READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS * count[0];
     start[1]  = 0;
 
     if (VERBOSE_MED) {
@@ -4107,58 +4464,53 @@ test_read_filtered_dataset_interleaved_read(void)
         HDfflush(stdout);
     }
 
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-         "Hyperslab selection succeeded");
-
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
+        VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
+    else
+        VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+             "Hyperslab selection succeeded");
 
     read_buf_size = flat_dims[0] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
-         "Dataset read succeeded");
+    if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC) {
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, NULL) >= 0),
+             "Dataset read succeeded");
+    }
+    else {
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
+             "Dataset read succeeded");
+    }
 
     global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != global_buf), "HDcalloc succeeded");
 
-    /*
-     * Since these chunks are shared, run multiple rounds of MPI_Allgatherv
-     * to collect all of the pieces into their appropriate locations. The
-     * number of times MPI_Allgatherv is run should be equal to the number
-     * of chunks in the first dimension of the dataset.
-     */
-    {
-        size_t loop_count       = count[0];
-        size_t total_recvcounts = 0;
-
-        recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-        VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
 
-        displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-        VRFY((NULL != displs), "HDcalloc succeeded");
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)(READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS *
+                              READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS);
+    recvcounts[READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC] = 0;
 
-        for (i = 0; i < (size_t)mpi_size; i++) {
-            recvcounts[i] = (int)dataset_dims[1];
-            total_recvcounts += (size_t)recvcounts[i];
-        }
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
 
-        for (i = 0; i < (size_t)mpi_size; i++)
-            displs[i] = (int)(i * dataset_dims[1]);
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * (size_t)(READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS *
+                                       READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NCOLS));
 
-        for (; loop_count; loop_count--) {
-            VRFY((MPI_SUCCESS == MPI_Allgatherv(&read_buf[(count[0] - loop_count) * dataset_dims[1]],
-                                                recvcounts[mpi_rank], C_DATATYPE_MPI,
-                                                &global_buf[(count[0] - loop_count) * total_recvcounts],
-                                                recvcounts, displs, C_DATATYPE_MPI, comm)),
-                 "MPI_Allgatherv succeeded");
-        }
-    }
+    if (mpi_rank == READ_SINGLE_NO_SELECTION_FILTERED_CHUNKS_NO_SELECT_PROC)
+        VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, 0, C_DATATYPE_MPI, global_buf, recvcounts, displs,
+                                            C_DATATYPE_MPI, comm)),
+             "MPI_Allgatherv succeeded");
+    else
+        VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf,
+                                            recvcounts, displs, C_DATATYPE_MPI, comm)),
+             "MPI_Allgatherv succeeded");
 
     VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -4176,7 +4528,7 @@ test_read_filtered_dataset_interleaved_read(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -4184,55 +4536,44 @@ test_read_filtered_dataset_interleaved_read(void)
 
 /*
  * Tests parallel read of filtered data in the case where
- * the dataset has 3 dimensions and each process reads from
- * its own "page" in the 3rd dimension.
+ * no process in the read operation has a selection in the
+ * dataset's dataspace. This test is to ensure that there
+ * are no assertion failures or similar issues due to size
+ * 0 allocations and the like.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads its own "page"
- * of the dataset and contributes its piece to a global buffer
- * that is checked for consistency.
+ * data to the dataset. Then, each rank will simply issue
+ * a no-op read.
  *
  * Programmer: Jordan Henderson
- *             05/16/2018
+ *             05/15/2018
  */
 static void
-test_read_3d_filtered_dataset_no_overlap_separate_pages(void)
+test_read_filtered_dataset_all_no_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                            hid_t dcpl_id, hid_t dxpl_id)
 {
-    MPI_Datatype vector_type;
-    MPI_Datatype resized_vector_type;
-    C_DATATYPE * read_buf    = NULL;
-    C_DATATYPE * correct_buf = NULL;
-    C_DATATYPE * global_buf  = NULL;
-    hsize_t      dataset_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t      chunk_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t      sel_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t      start[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t      stride[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t      count[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t      block[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
-    hsize_t      flat_dims[1];
-    size_t       i, read_buf_size, correct_buf_size;
-    hid_t        file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t        filespace = -1, memspace = -1;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    hsize_t     dataset_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    size_t      read_buf_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing read from unshared filtered chunks on separate pages in 3D dataset");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from filtered chunks with all processes having no selection");
 
-    dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
-    dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
-    dataset_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DEPTH;
+    dataset_dims[0] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
     correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (hsize_t)mpi_size) + (i / (hsize_t)mpi_size));
-
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
         VRFY((plist_id >= 0), "FAPL creation succeeded");
@@ -4245,145 +4586,77 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace =
-            H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, dataset_dims, NULL);
+        filespace = H5Screate_simple(READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-        chunk_dims[2] = 1;
+        chunk_dims[0] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_ALL_NO_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY(
-            (H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, chunk_dims) >= 0),
-            "Chunk size set");
+        VRFY((H5Pset_chunk(plist_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+             "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME,
-                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        dset_id = H5Dcreate2(group_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+                             filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
         VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
         VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
-    sel_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
-    sel_dims[2] = 1;
-
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1] * sel_dims[2];
+    sel_dims[0] = sel_dims[1] = 0;
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
+    memspace = H5Screate_simple(READ_ALL_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
     /* Select hyperslab in the file */
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    /*
-     * Each process defines the dataset selection in the file and
-     * reads it to the selection in memory
-     */
-    count[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS /
-               (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-    count[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS /
-               (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-    count[2]  = 1;
-    stride[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-    stride[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-    stride[2] = 1;
-    block[0]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
-    block[1]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
-    block[2]  = 1;
-    start[0]  = 0;
-    start[1]  = 0;
-    start[2]  = (hsize_t)mpi_rank;
-
-    if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ]\n",
-                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
-        HDfflush(stdout);
-    }
-
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-         "Hyperslab selection succeeded");
-
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    VRFY((H5Sselect_none(filespace) >= 0), "Select none succeeded");
 
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+    read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
-    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
-
-    /*
-     * Due to the nature of 3-dimensional reading, create an MPI vector type that allows each
-     * rank to write to the nth position of the global data buffer, where n is the rank number.
-     */
-    VRFY((MPI_SUCCESS == MPI_Type_vector((int)flat_dims[0], 1, mpi_size, C_DATATYPE_MPI, &vector_type)),
-         "MPI_Type_vector succeeded");
-    VRFY((MPI_SUCCESS == MPI_Type_commit(&vector_type)), "MPI_Type_commit succeeded");
-
-    /*
-     * Resize the type to allow interleaving,
-     * so make it only one MPI_LONG wide
-     */
-    VRFY((MPI_SUCCESS == MPI_Type_create_resized(vector_type, 0, sizeof(long), &resized_vector_type)),
-         "MPI_Type_create_resized");
-    VRFY((MPI_SUCCESS == MPI_Type_commit(&resized_vector_type)), "MPI_Type_commit succeeded");
-
-    VRFY((MPI_SUCCESS == MPI_Allgather(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, 1,
-                                       resized_vector_type, comm)),
-         "MPI_Allgather succeeded");
-
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
-
-    VRFY((MPI_SUCCESS == MPI_Type_free(&vector_type)), "MPI_Type_free succeeded");
-    VRFY((MPI_SUCCESS == MPI_Type_free(&resized_vector_type)), "MPI_Type_free succeeded");
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
-    if (global_buf)
-        HDfree(global_buf);
     if (read_buf)
         HDfree(read_buf);
     if (correct_buf)
@@ -4392,70 +4665,61 @@ test_read_3d_filtered_dataset_no_overlap_separate_pages(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of transformed and filtered data in the
- * case where only one process is reading from a particular
- * chunk in the operation. Normally, a data transform function
- * will cause the parallel library to break to independent I/O
- * and this isn't allowed when there are filters in the pipeline.
- * However, in this case the parallel library recognizes that
- * the used data transform function "x" is the same as not
- * applying the transform function. Therefore it does not apply
- * the transform function resulting in not breaking to
- * independent I/O.
+ * Tests parallel read of filtered data by using point
+ * selections instead of hyperslab selections.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * the dataset and contributes its piece to a global buffer
- * that is checked for consistency.
+ * data to the dataset. Then, each rank will read part
+ * of the dataset using a point selection and will
+ * contribute its piece to a global buffer that is
+ * checked for consistency.
  *
- * Programmer: Jan-Willem Blokland
- *             08/20/2021
+ * Programmer: Jordan Henderson
+ *             05/15/2018
  */
 static void
-test_read_transformed_filtered_dataset_no_overlap(void)
+test_read_filtered_dataset_point_selection(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                           hid_t dcpl_id, hid_t dxpl_id)
 {
-    C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
+    C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *global_buf  = NULL;
-    hsize_t     dataset_dims[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     sel_dims[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     start[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     stride[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     count[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
-    hsize_t     block[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t *   coords      = NULL;
+    hsize_t     dataset_dims[READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS];
     hsize_t     flat_dims[1];
-    size_t      i, read_buf_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    size_t      i, j, read_buf_size, correct_buf_size;
+    size_t      num_points;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
     int *       recvcounts = NULL;
     int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs("Testing read from unshared transformed and filtered chunks");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from filtered chunks with point selection");
 
-    dataset_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
+    dataset_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
-    correct_buf_size = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS *
-                       (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS * sizeof(*correct_buf);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
     correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
-                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
+        correct_buf[i] = (C_DATATYPE)(
+            (dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+            (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
 
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
@@ -4469,66 +4733,58 @@ test_read_transformed_filtered_dataset_no_overlap(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace =
-            H5Screate_simple(READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+        filespace = H5Screate_simple(READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+        chunk_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_CH_NCOLS;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY(
-            (H5Pset_chunk(plist_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
-            "Chunk size set");
+        VRFY((H5Pset_chunk(plist_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+             "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME,
-                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        dset_id = H5Dcreate2(group_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+                             filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-
-        /* Create property list for collective dataset read */
-        plist_id = H5Pcreate(H5P_DATASET_XFER);
-        VRFY((plist_id >= 0), "DXPL creation succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-        /* Set data transform expression */
-        VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded");
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, correct_buf) >= 0),
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-    sel_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
+    sel_dims[0] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS / (hsize_t)mpi_size;
+    sel_dims[1] = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS;
 
     /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
     flat_dims[0] = sel_dims[0] * sel_dims[1];
@@ -4536,71 +4792,70 @@ test_read_transformed_filtered_dataset_no_overlap(void)
     memspace = H5Screate_simple(1, flat_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
-    /* Select hyperslab in the file */
+    /* Set up point selection */
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    /*
-     * Each process defines the dataset selection in the file and reads
-     * it to the selection in memory
-     */
-    count[0] = 1;
-    count[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS /
-               (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
-    block[1]  = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
-    start[0]  = ((hsize_t)mpi_rank * (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS * count[0]);
-    start[1]  = 0;
-
-    if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE " ]\n",
-                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
-        HDfflush(stdout);
-    }
-
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
-         "Hyperslab selection succeeded");
-
-    /* Create property list for collective dataset read and data transform */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    num_points = (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NROWS *
+                 (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS / (hsize_t)mpi_size;
+    coords = (hsize_t *)HDcalloc(1, 2 * num_points * sizeof(*coords));
+    VRFY((NULL != coords), "Coords HDcalloc succeeded");
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    for (i = 0; i < num_points; i++)
+        for (j = 0; j < READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS; j++)
+            coords[(i * READ_POINT_SELECTION_FILTERED_CHUNKS_DATASET_DIMS) + j] =
+                (j > 0) ? (i % (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)
+                        : ((hsize_t)mpi_rank +
+                           ((hsize_t)mpi_size * (i / (hsize_t)READ_POINT_SELECTION_FILTERED_CHUNKS_NCOLS)));
 
-    /* Set data transform expression */
-    VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded");
+    VRFY((H5Sselect_elements(filespace, H5S_SELECT_SET, (hsize_t)num_points, (const hsize_t *)coords) >= 0),
+         "Point selection succeeded");
 
     read_buf_size = flat_dims[0] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != global_buf), "HDcalloc succeeded");
 
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    /*
+     * Since these chunks are shared, run multiple rounds of MPI_Allgatherv
+     * to collect all of the pieces into their appropriate locations. The
+     * number of times MPI_Allgatherv is run should be equal to the number
+     * of chunks in the first dimension of the dataset.
+     */
+    {
+        size_t original_loop_count = dataset_dims[0] / (hsize_t)mpi_size;
+        size_t cur_loop_count      = original_loop_count;
+        size_t total_recvcounts    = 0;
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)flat_dims[0];
+        recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+        VRFY((NULL != recvcounts), "HDcalloc succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+        displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+        VRFY((NULL != displs), "HDcalloc succeeded");
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0]);
+        for (i = 0; i < (size_t)mpi_size; i++) {
+            recvcounts[i] = (int)dataset_dims[1];
+            total_recvcounts += (size_t)recvcounts[i];
+        }
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
-                                        displs, C_DATATYPE_MPI, comm)),
-         "MPI_Allgatherv succeeded");
+        for (i = 0; i < (size_t)mpi_size; i++)
+            displs[i] = (int)(i * dataset_dims[1]);
+
+        for (; cur_loop_count; cur_loop_count--) {
+            VRFY((MPI_SUCCESS ==
+                  MPI_Allgatherv(&read_buf[(original_loop_count - cur_loop_count) * dataset_dims[1]],
+                                 recvcounts[mpi_rank], C_DATATYPE_MPI,
+                                 &global_buf[(original_loop_count - cur_loop_count) * total_recvcounts],
+                                 recvcounts, displs, C_DATATYPE_MPI, comm)),
+                 "MPI_Allgatherv succeeded");
+        }
+    }
 
     VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -4615,10 +4870,12 @@ test_read_transformed_filtered_dataset_no_overlap(void)
     if (correct_buf)
         HDfree(correct_buf);
 
+    HDfree(coords);
+
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -4626,56 +4883,67 @@ test_read_transformed_filtered_dataset_no_overlap(void)
 
 /*
  * Tests parallel read of filtered data in the case where
- * the dataset has 3 dimensions and each process reads from
- * each "page" in the 3rd dimension. However, no chunk on a
- * given "page" is read from by more than one process.
+ * each process reads an equal amount of data from each
+ * chunk in the dataset. Each chunk is distributed among the
+ * processes in round-robin fashion by blocks of size 1 until
+ * the whole chunk is selected, leading to an interleaved
+ * read pattern.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * each "page" of the dataset and contributes its piece to a
- * global buffer that is checked for consistency.
+ * data to the dataset. Then, each rank will read part
+ * of each chunk of the dataset and will contribute its
+ * pieces to a global buffer that is checked for consistency.
  *
  * Programmer: Jordan Henderson
- *             05/16/2018
+ *             05/15/2018
  */
 static void
-test_read_3d_filtered_dataset_no_overlap_same_pages(void)
+test_read_filtered_dataset_interleaved_read(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                            hid_t dcpl_id, hid_t dxpl_id)
 {
     C_DATATYPE *read_buf    = NULL;
     C_DATATYPE *correct_buf = NULL;
     C_DATATYPE *global_buf  = NULL;
-    hsize_t     dataset_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     chunk_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     sel_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     start[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     stride[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     count[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     block[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
-    hsize_t     flat_dims[1];
-    size_t      i, read_buf_size, correct_buf_size;
-    hid_t       file_id, dset_id, plist_id;
-    hid_t       filespace, memspace;
-    int *       recvcounts = NULL;
-    int *       displs     = NULL;
-
-    if (MAINPROCESS)
-        HDputs("Testing read from unshared filtered chunks on the same pages in 3D dataset");
+    hsize_t     dataset_dims[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     chunk_dims[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     sel_dims[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     start[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     stride[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     count[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     block[INTERLEAVED_READ_FILTERED_DATASET_DIMS];
+    hsize_t     flat_dims[1];
+    size_t      i, read_buf_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int *       recvcounts = NULL;
+    int *       displs     = NULL;
 
-    CHECK_CUR_FILTER_AVAIL();
+    if (MAINPROCESS)
+        HDputs("Testing interleaved read from filtered chunks");
 
-    dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NROWS;
-    dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
-    dataset_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
+    dataset_dims[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NROWS;
+    dataset_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
 
     correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] * dataset_dims[1])) +
-                                      (i / (dataset_dims[0] * dataset_dims[1])));
+        /* Add Column Index */
+        correct_buf[i] =
+            (C_DATATYPE)((i % (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS)
+
+                         /* Add the Row Index */
+                         + ((i % (hsize_t)(mpi_size * INTERLEAVED_READ_FILTERED_DATASET_NCOLS)) /
+                            (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS)
+
+                         /* Add the amount that gets added when a rank moves down to its next section
+                            vertically in the dataset */
+                         + ((hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS *
+                            (i / (hsize_t)(mpi_size * INTERLEAVED_READ_FILTERED_DATASET_NCOLS))));
 
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
@@ -4689,63 +4957,61 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace =
-            H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, dataset_dims, NULL);
+        filespace = H5Screate_simple(INTERLEAVED_READ_FILTERED_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-        chunk_dims[2] = 1;
+        chunk_dims[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NROWS;
+        chunk_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, chunk_dims) >=
-              0),
+        VRFY((H5Pset_chunk(plist_id, INTERLEAVED_READ_FILTERED_DATASET_DIMS, chunk_dims) >= 0),
              "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME,
-                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        dset_id = H5Dcreate2(group_id, INTERLEAVED_READ_FILTERED_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                             H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
         VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
         VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, INTERLEAVED_READ_FILTERED_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-    sel_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
-    sel_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
+    sel_dims[0] = (hsize_t)(INTERLEAVED_READ_FILTERED_DATASET_NROWS / mpi_size);
+    sel_dims[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_NCOLS;
 
     /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1] * sel_dims[2];
+    flat_dims[0] = sel_dims[0] * sel_dims[1];
 
     memspace = H5Screate_simple(1, flat_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
@@ -4758,19 +5024,16 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void)
      * Each process defines the dataset selection in the file and
      * reads it to the selection in memory
      */
-    count[0] = 1;
-    count[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS /
-               (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-    count[2]  = (hsize_t)mpi_size;
-    stride[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-    stride[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-    stride[2] = 1;
-    block[0]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
-    block[1]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
-    block[2]  = 1;
-    start[0]  = ((hsize_t)mpi_rank * (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS * count[0]);
+    count[0] =
+        (hsize_t)(INTERLEAVED_READ_FILTERED_DATASET_NROWS / INTERLEAVED_READ_FILTERED_DATASET_CH_NROWS);
+    count[1] =
+        (hsize_t)(INTERLEAVED_READ_FILTERED_DATASET_NCOLS / INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS);
+    stride[0] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NROWS;
+    stride[1] = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS;
+    block[0]  = 1;
+    block[1]  = (hsize_t)INTERLEAVED_READ_FILTERED_DATASET_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank;
     start[1]  = 0;
-    start[2]  = 0;
 
     if (VERBOSE_MED) {
         HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
@@ -4783,39 +5046,49 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void)
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
     read_buf_size = flat_dims[0] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != global_buf), "HDcalloc succeeded");
 
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    /*
+     * Since these chunks are shared, run multiple rounds of MPI_Allgatherv
+     * to collect all of the pieces into their appropriate locations. The
+     * number of times MPI_Allgatherv is run should be equal to the number
+     * of chunks in the first dimension of the dataset.
+     */
+    {
+        size_t loop_count       = count[0];
+        size_t total_recvcounts = 0;
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)flat_dims[0];
+        recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+        VRFY((NULL != recvcounts), "HDcalloc succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+        displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+        VRFY((NULL != displs), "HDcalloc succeeded");
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0]);
+        for (i = 0; i < (size_t)mpi_size; i++) {
+            recvcounts[i] = (int)dataset_dims[1];
+            total_recvcounts += (size_t)recvcounts[i];
+        }
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
-                                        displs, C_DATATYPE_MPI, comm)),
-         "MPI_Allgatherv succeeded");
+        for (i = 0; i < (size_t)mpi_size; i++)
+            displs[i] = (int)(i * dataset_dims[1]);
+
+        for (; loop_count; loop_count--) {
+            VRFY((MPI_SUCCESS == MPI_Allgatherv(&read_buf[(count[0] - loop_count) * dataset_dims[1]],
+                                                recvcounts[mpi_rank], C_DATATYPE_MPI,
+                                                &global_buf[(count[0] - loop_count) * total_recvcounts],
+                                                recvcounts, displs, C_DATATYPE_MPI, comm)),
+                 "MPI_Allgatherv succeeded");
+        }
+    }
 
     VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -4833,7 +5106,7 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -4842,45 +5115,44 @@ test_read_3d_filtered_dataset_no_overlap_same_pages(void)
 /*
  * Tests parallel read of filtered data in the case where
  * the dataset has 3 dimensions and each process reads from
- * each "page" in the 3rd dimension. Further, each chunk in
- * each "page" is read from equally by all processes.
+ * its own "page" in the 3rd dimension.
  *
  * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads part of each
- * chunk of each "page" and contributes its pieces to a
- * global buffer that is checked for consistency.
+ * data to the dataset. Then, each rank reads its own "page"
+ * of the dataset and contributes its piece to a global buffer
+ * that is checked for consistency.
  *
  * Programmer: Jordan Henderson
  *             05/16/2018
  */
 static void
-test_read_3d_filtered_dataset_overlap(void)
+test_read_3d_filtered_dataset_no_overlap_separate_pages(const char *parent_group, H5Z_filter_t filter_id,
+                                                        hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
     MPI_Datatype vector_type;
     MPI_Datatype resized_vector_type;
     C_DATATYPE * read_buf    = NULL;
     C_DATATYPE * correct_buf = NULL;
     C_DATATYPE * global_buf  = NULL;
-    hsize_t      dataset_dims[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t      chunk_dims[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t      sel_dims[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t      start[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t      stride[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t      count[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
-    hsize_t      block[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      dataset_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t      chunk_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t      sel_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t      start[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t      stride[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t      count[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
+    hsize_t      block[READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS];
     hsize_t      flat_dims[1];
     size_t       i, read_buf_size, correct_buf_size;
-    hid_t        file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t        filespace = -1, memspace = -1;
+    hid_t        file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t        group_id  = H5I_INVALID_HID;
+    hid_t        filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing read from shared filtered chunks in 3D dataset");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from unshared filtered chunks on separate pages in 3D dataset");
 
-    dataset_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NROWS;
-    dataset_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NCOLS;
-    dataset_dims[2] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+    dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
+    dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
+    dataset_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DEPTH;
 
     /* Setup the buffer for writing and for comparison */
     correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
@@ -4889,20 +5161,7 @@ test_read_3d_filtered_dataset_overlap(void)
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
     for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        /* Add the Column Index */
-        correct_buf[i] = (C_DATATYPE)(
-            (i % (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_DEPTH * READ_SHARED_FILTERED_CHUNKS_3D_NCOLS))
-
-            /* Add the Row Index */
-            + ((i % (hsize_t)(mpi_size * READ_SHARED_FILTERED_CHUNKS_3D_DEPTH *
-                              READ_SHARED_FILTERED_CHUNKS_3D_NCOLS)) /
-               (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_DEPTH * READ_SHARED_FILTERED_CHUNKS_3D_NCOLS))
-
-            /* Add the amount that gets added when a rank moves down to its next
-               section vertically in the dataset */
-            + ((hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_DEPTH * READ_SHARED_FILTERED_CHUNKS_3D_NCOLS) *
-               (i / (hsize_t)(mpi_size * READ_SHARED_FILTERED_CHUNKS_3D_DEPTH *
-                              READ_SHARED_FILTERED_CHUNKS_3D_NCOLS))));
+        correct_buf[i] = (C_DATATYPE)((i % (hsize_t)mpi_size) + (i / (hsize_t)mpi_size));
 
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
@@ -4916,58 +5175,62 @@ test_read_3d_filtered_dataset_overlap(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
         /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, dataset_dims, NULL);
+        filespace =
+            H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, dataset_dims, NULL);
         VRFY((filespace >= 0), "File dataspace creation succeeded");
 
         /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+        chunk_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
         chunk_dims[2] = 1;
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, chunk_dims) >= 0),
-             "Chunk size set");
+        VRFY(
+            (H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_DIMS, chunk_dims) >= 0),
+            "Chunk size set");
 
         /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        dset_id = H5Dcreate2(file_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME,
-                             filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        dset_id = H5Dcreate2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME,
+                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
         VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
         VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
         VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
              "Dataset write succeeded");
 
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT);
+    dset_id = H5Dopen2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_DATASET_NAME, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    sel_dims[0] = (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_NROWS / mpi_size);
-    sel_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NCOLS;
-    sel_dims[2] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+    sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS;
+    sel_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS;
+    sel_dims[2] = 1;
 
     /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
     flat_dims[0] = sel_dims[0] * sel_dims[1] * sel_dims[2];
@@ -4983,18 +5246,20 @@ test_read_3d_filtered_dataset_overlap(void)
      * Each process defines the dataset selection in the file and
      * reads it to the selection in memory
      */
-    count[0]  = (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_NROWS / READ_SHARED_FILTERED_CHUNKS_3D_CH_NROWS);
-    count[1]  = (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_NCOLS / READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS);
-    count[2]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_DEPTH;
-    stride[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
-    stride[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+    count[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NROWS /
+               (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+    count[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_NCOLS /
+               (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
+    count[2]  = 1;
+    stride[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+    stride[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
     stride[2] = 1;
-    block[0]  = 1;
-    block[1]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+    block[0]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NROWS;
+    block[1]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SEP_PAGE_CH_NCOLS;
     block[2]  = 1;
-    start[0]  = (hsize_t)mpi_rank;
+    start[0]  = 0;
     start[1]  = 0;
-    start[2]  = 0;
+    start[2]  = (hsize_t)mpi_rank;
 
     if (VERBOSE_MED) {
         HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
@@ -5007,51 +5272,36 @@ test_read_3d_filtered_dataset_overlap(void)
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
     read_buf_size = flat_dims[0] * sizeof(*read_buf);
 
     read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
     global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != global_buf), "HDcalloc succeeded");
 
-    {
-        size_t run_length =
-            (size_t)(READ_SHARED_FILTERED_CHUNKS_3D_NCOLS * READ_SHARED_FILTERED_CHUNKS_3D_DEPTH);
-        size_t num_blocks = (size_t)(READ_SHARED_FILTERED_CHUNKS_3D_NROWS / mpi_size);
+    /*
+     * Due to the nature of 3-dimensional reading, create an MPI vector type that allows each
+     * rank to write to the nth position of the global data buffer, where n is the rank number.
+     */
+    VRFY((MPI_SUCCESS == MPI_Type_vector((int)flat_dims[0], 1, mpi_size, C_DATATYPE_MPI, &vector_type)),
+         "MPI_Type_vector succeeded");
+    VRFY((MPI_SUCCESS == MPI_Type_commit(&vector_type)), "MPI_Type_commit succeeded");
 
-        /*
-         * Due to the nature of 3-dimensional reading, create an MPI vector type that allows each
-         * rank to write to the nth position of the global data buffer, where n is the rank number.
-         */
-        VRFY(
-            (MPI_SUCCESS == MPI_Type_vector((int)num_blocks, (int)run_length,
-                                            (int)(mpi_size * (int)run_length), C_DATATYPE_MPI, &vector_type)),
-            "MPI_Type_vector succeeded");
-        VRFY((MPI_SUCCESS == MPI_Type_commit(&vector_type)), "MPI_Type_commit succeeded");
-
-        /*
-         * Resize the type to allow interleaving,
-         * so make it "run_length" MPI_LONGs wide
-         */
-        VRFY((MPI_SUCCESS == MPI_Type_create_resized(vector_type, 0, (MPI_Aint)(run_length * sizeof(long)),
-                                                     &resized_vector_type)),
-             "MPI_Type_create_resized");
-        VRFY((MPI_SUCCESS == MPI_Type_commit(&resized_vector_type)), "MPI_Type_commit succeeded");
-    }
+    /*
+     * Resize the type to allow interleaving,
+     * so make it only one MPI_LONG wide
+     */
+    VRFY((MPI_SUCCESS == MPI_Type_create_resized(vector_type, 0, sizeof(long), &resized_vector_type)),
+         "MPI_Type_create_resized");
+    VRFY((MPI_SUCCESS == MPI_Type_commit(&resized_vector_type)), "MPI_Type_commit succeeded");
 
     VRFY((MPI_SUCCESS == MPI_Allgather(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, 1,
                                        resized_vector_type, comm)),
-         "MPI_Allgatherv succeeded");
+         "MPI_Allgather succeeded");
 
     VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
@@ -5068,78 +5318,1890 @@ test_read_3d_filtered_dataset_overlap(void)
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data to unshared
- * chunks using a compound datatype which doesn't
- * require a datatype conversion.
+ * Tests parallel read of transformed and filtered data in the
+ * case where only one process is reading from a particular
+ * chunk in the operation. Normally, a data transform function
+ * will cause the parallel library to break to independent I/O
+ * and this isn't allowed when there are filters in the pipeline.
+ * However, in this case the parallel library recognizes that
+ * the used data transform function "x" is the same as not
+ * applying the transform function. Therefore it does not apply
+ * the transform function resulting in not breaking to
+ * independent I/O.
  *
  * The MAINPROCESS rank will first write out all of the
  * data to the dataset. Then, each rank reads a part of
- * the dataset and contributes its piece to a global
- * buffer that is checked for consistency.
+ * the dataset and contributes its piece to a global buffer
+ * that is checked for consistency.
  *
- * Programmer: Jordan Henderson
- *             05/17/2018
+ * Programmer: Jan-Willem Blokland
+ *             08/20/2021
  */
 static void
-test_read_cmpd_filtered_dataset_no_conversion_unshared(void)
+test_read_transformed_filtered_dataset_no_overlap(const char *parent_group, H5Z_filter_t filter_id,
+                                                  hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    COMPOUND_C_DATATYPE *global_buf  = NULL;
-    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              flat_dims[1];
-    size_t               i, read_buf_size, correct_buf_size;
-    hid_t                file_id = -1, dset_id = -1, plist_id = -1, memtype = -1;
-    hid_t                filespace = -1, memspace = -1;
-    int *                recvcounts = NULL;
-    int *                displs     = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    C_DATATYPE *global_buf  = NULL;
+    hsize_t     dataset_dims[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     start[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     count[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     block[READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS];
+    hsize_t     flat_dims[1];
+    size_t      i, read_buf_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int *       recvcounts = NULL;
+    int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs("Testing read from unshared filtered chunks in Compound Datatype dataset without Datatype "
-               "conversion");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing read from unshared transformed and filtered chunks");
 
-    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NROWS;
-    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NCOLS;
+    dataset_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
 
     /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+    correct_buf_size = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NROWS *
+                       (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS * sizeof(*correct_buf);
 
-    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
     VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
-        correct_buf[i].field1 = (short)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])) +
+                                      (i / (dataset_dims[0] / (hsize_t)mpi_size * dataset_dims[1])));
+
+    if (MAINPROCESS) {
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        filespace =
+            H5Screate_simple(READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        chunk_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY(
+            (H5Pset_chunk(plist_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+            "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME,
+                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        /* Create property list for collective dataset read */
+        plist_id = H5Pcreate(H5P_DATASET_XFER);
+        VRFY((plist_id >= 0), "DXPL creation succeeded");
+
+        /* Set data transform expression */
+        VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded");
+
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, correct_buf) >= 0),
+             "Dataset write succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+
+        /* Verify space allocation status */
+        plist_id = H5Dget_create_plist(dset_id);
+        VRFY((plist_id >= 0), "H5Dget_create_plist succeeded");
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id = H5Dopen2(group_id, READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    sel_dims[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+    sel_dims[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS;
+
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    /*
+     * Each process defines the dataset selection in the file and reads
+     * it to the selection in memory
+     */
+    count[0] = 1;
+    count[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_NCOLS /
+               (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NCOLS;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)READ_UNSHARED_TRANSFORMED_FILTERED_CHUNKS_CH_NROWS * count[0]);
+    start[1]  = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    /* Create property list for data transform */
+    plist_id = H5Pcopy(dxpl_id);
+    VRFY((plist_id >= 0), "DXPL copy succeeded");
+
+    /* Set data transform expression */
+    VRFY((H5Pset_data_transform(plist_id, "x") >= 0), "Set data transform expression succeeded");
+
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+
+    read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)flat_dims[0];
+
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * flat_dims[0]);
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
+                                        displs, C_DATATYPE_MPI, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (global_buf)
+        HDfree(global_buf);
+    if (read_buf)
+        HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests parallel read of filtered data in the case where
+ * the dataset has 3 dimensions and each process reads from
+ * each "page" in the 3rd dimension. However, no chunk on a
+ * given "page" is read from by more than one process.
+ *
+ * The MAINPROCESS rank will first write out all of the
+ * data to the dataset. Then, each rank reads a part of
+ * each "page" of the dataset and contributes its piece to a
+ * global buffer that is checked for consistency.
+ *
+ * Programmer: Jordan Henderson
+ *             05/16/2018
+ */
+static void
+test_read_3d_filtered_dataset_no_overlap_same_pages(const char *parent_group, H5Z_filter_t filter_id,
+                                                    hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
+{
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    C_DATATYPE *global_buf  = NULL;
+    hsize_t     dataset_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     chunk_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     sel_dims[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     start[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     stride[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     count[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     block[READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS];
+    hsize_t     flat_dims[1];
+    size_t      i, read_buf_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int *       recvcounts = NULL;
+    int *       displs     = NULL;
+
+    if (MAINPROCESS)
+        HDputs("Testing read from unshared filtered chunks on the same pages in 3D dataset");
+
+    dataset_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NROWS;
+    dataset_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
+    dataset_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
+
+    /* Setup the buffer for writing and for comparison */
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] * dataset_dims[1])) +
+                                      (i / (dataset_dims[0] * dataset_dims[1])));
+
+    if (MAINPROCESS) {
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        filespace =
+            H5Screate_simple(READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        chunk_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+        chunk_dims[2] = 1;
+
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY((H5Pset_chunk(plist_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_DIMS, chunk_dims) >=
+              0),
+             "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME,
+                             HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id = H5Dopen2(group_id, READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    sel_dims[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+    sel_dims[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS;
+    sel_dims[2] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_DEPTH;
+
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1] * sel_dims[2];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    /*
+     * Each process defines the dataset selection in the file and
+     * reads it to the selection in memory
+     */
+    count[0] = 1;
+    count[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_NCOLS /
+               (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+    count[2]  = (hsize_t)mpi_size;
+    stride[0] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+    stride[1] = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+    stride[2] = 1;
+    block[0]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS;
+    block[1]  = (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NCOLS;
+    block[2]  = 1;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)READ_UNSHARED_FILTERED_CHUNKS_3D_SAME_PAGE_CH_NROWS * count[0]);
+    start[1]  = 0;
+    start[2]  = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+
+    read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)flat_dims[0];
+
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * flat_dims[0]);
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, recvcounts,
+                                        displs, C_DATATYPE_MPI, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (global_buf)
+        HDfree(global_buf);
+    if (read_buf)
+        HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests parallel read of filtered data in the case where
+ * the dataset has 3 dimensions and each process reads from
+ * each "page" in the 3rd dimension. Further, each chunk in
+ * each "page" is read from equally by all processes.
+ *
+ * The MAINPROCESS rank will first write out all of the
+ * data to the dataset. Then, each rank reads part of each
+ * chunk of each "page" and contributes its pieces to a
+ * global buffer that is checked for consistency.
+ *
+ * Programmer: Jordan Henderson
+ *             05/16/2018
+ */
+static void
+test_read_3d_filtered_dataset_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                      hid_t dcpl_id, hid_t dxpl_id)
+{
+    MPI_Datatype vector_type;
+    MPI_Datatype resized_vector_type;
+    C_DATATYPE * read_buf    = NULL;
+    C_DATATYPE * correct_buf = NULL;
+    C_DATATYPE * global_buf  = NULL;
+    hsize_t      dataset_dims[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      chunk_dims[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      sel_dims[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      start[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      stride[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      count[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      block[READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS];
+    hsize_t      flat_dims[1];
+    size_t       i, read_buf_size, correct_buf_size;
+    hid_t        file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t        group_id  = H5I_INVALID_HID;
+    hid_t        filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+
+    if (MAINPROCESS)
+        HDputs("Testing read from shared filtered chunks in 3D dataset");
+
+    dataset_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NROWS;
+    dataset_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NCOLS;
+    dataset_dims[2] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+
+    /* Setup the buffer for writing and for comparison */
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        /* Add the Column Index */
+        correct_buf[i] = (C_DATATYPE)(
+            (i % (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_DEPTH * READ_SHARED_FILTERED_CHUNKS_3D_NCOLS))
+
+            /* Add the Row Index */
+            + ((i % (hsize_t)(mpi_size * READ_SHARED_FILTERED_CHUNKS_3D_DEPTH *
+                              READ_SHARED_FILTERED_CHUNKS_3D_NCOLS)) /
+               (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_DEPTH * READ_SHARED_FILTERED_CHUNKS_3D_NCOLS))
+
+            /* Add the amount that gets added when a rank moves down to its next
+               section vertically in the dataset */
+            + ((hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_DEPTH * READ_SHARED_FILTERED_CHUNKS_3D_NCOLS) *
+               (i / (hsize_t)(mpi_size * READ_SHARED_FILTERED_CHUNKS_3D_DEPTH *
+                              READ_SHARED_FILTERED_CHUNKS_3D_NCOLS))));
+
+    if (MAINPROCESS) {
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        filespace = H5Screate_simple(READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        chunk_dims[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+        chunk_dims[2] = 1;
+
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY((H5Pset_chunk(plist_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_DIMS, chunk_dims) >= 0),
+             "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, HDF5_DATATYPE_NAME,
+                             filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id = H5Dopen2(group_id, READ_SHARED_FILTERED_CHUNKS_3D_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    sel_dims[0] = (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_NROWS / mpi_size);
+    sel_dims[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_NCOLS;
+    sel_dims[2] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1] * sel_dims[2];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    /*
+     * Each process defines the dataset selection in the file and
+     * reads it to the selection in memory
+     */
+    count[0]  = (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_NROWS / READ_SHARED_FILTERED_CHUNKS_3D_CH_NROWS);
+    count[1]  = (hsize_t)(READ_SHARED_FILTERED_CHUNKS_3D_NCOLS / READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS);
+    count[2]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_DEPTH;
+    stride[0] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NROWS;
+    stride[1] = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+    stride[2] = 1;
+    block[0]  = 1;
+    block[1]  = (hsize_t)READ_SHARED_FILTERED_CHUNKS_3D_CH_NCOLS;
+    block[2]  = 1;
+    start[0]  = (hsize_t)mpi_rank;
+    start[1]  = 0;
+    start[2]  = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+
+    read_buf = (C_DATATYPE *)HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    global_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    {
+        size_t run_length =
+            (size_t)(READ_SHARED_FILTERED_CHUNKS_3D_NCOLS * READ_SHARED_FILTERED_CHUNKS_3D_DEPTH);
+        size_t num_blocks = (size_t)(READ_SHARED_FILTERED_CHUNKS_3D_NROWS / mpi_size);
+
+        /*
+         * Due to the nature of 3-dimensional reading, create an MPI vector type that allows each
+         * rank to write to the nth position of the global data buffer, where n is the rank number.
+         */
+        VRFY(
+            (MPI_SUCCESS == MPI_Type_vector((int)num_blocks, (int)run_length,
+                                            (int)(mpi_size * (int)run_length), C_DATATYPE_MPI, &vector_type)),
+            "MPI_Type_vector succeeded");
+        VRFY((MPI_SUCCESS == MPI_Type_commit(&vector_type)), "MPI_Type_commit succeeded");
+
+        /*
+         * Resize the type to allow interleaving,
+         * so make it "run_length" MPI_LONGs wide
+         */
+        VRFY((MPI_SUCCESS == MPI_Type_create_resized(vector_type, 0, (MPI_Aint)(run_length * sizeof(long)),
+                                                     &resized_vector_type)),
+             "MPI_Type_create_resized");
+        VRFY((MPI_SUCCESS == MPI_Type_commit(&resized_vector_type)), "MPI_Type_commit succeeded");
+    }
+
+    VRFY((MPI_SUCCESS == MPI_Allgather(read_buf, (int)flat_dims[0], C_DATATYPE_MPI, global_buf, 1,
+                                       resized_vector_type, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    VRFY((MPI_SUCCESS == MPI_Type_free(&vector_type)), "MPI_Type_free succeeded");
+    VRFY((MPI_SUCCESS == MPI_Type_free(&resized_vector_type)), "MPI_Type_free succeeded");
+
+    if (global_buf)
+        HDfree(global_buf);
+    if (read_buf)
+        HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests parallel read of filtered data to unshared
+ * chunks using a compound datatype which doesn't
+ * require a datatype conversion.
+ *
+ * The MAINPROCESS rank will first write out all of the
+ * data to the dataset. Then, each rank reads a part of
+ * the dataset and contributes its piece to a global
+ * buffer that is checked for consistency.
+ *
+ * Programmer: Jordan Henderson
+ *             05/17/2018
+ */
+static void
+test_read_cmpd_filtered_dataset_no_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id,
+                                                       hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
+{
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    COMPOUND_C_DATATYPE *global_buf  = NULL;
+    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              flat_dims[1];
+    size_t               i, read_buf_size, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID,
+          memtype   = H5I_INVALID_HID;
+    hid_t group_id  = H5I_INVALID_HID;
+    hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int * recvcounts = NULL;
+    int * displs     = NULL;
+
+    if (MAINPROCESS)
+        HDputs("Testing read from unshared filtered chunks in Compound Datatype dataset without Datatype "
+               "conversion");
+
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
+
+    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NROWS;
+    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_NCOLS;
+
+    /* Setup the buffer for writing and for comparison */
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+
+    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
+        correct_buf[i].field1 = (short)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+
+        correct_buf[i].field2 = (int)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+
+        correct_buf[i].field3 = (long)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+    }
+
+    /* Create the compound type for memory. */
+    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
+    VRFY((memtype >= 0), "Datatype creation succeeded");
+
+    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
+         "Datatype insertion succeeded");
+
+    if (MAINPROCESS) {
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
+                                     dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        chunk_dims[0] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+        chunk_dims[1] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
+
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
+                           chunk_dims) >= 0),
+             "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME,
+                             memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id =
+        H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    /*
+     * Each process defines the dataset selection in the file and
+     * reads it to the selection in memory
+     */
+    count[0]  = 1;
+    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    stride[0] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
+    block[0]  = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
+    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
+    start[0]  = 0;
+    start[1]  = ((hsize_t)mpi_rank * READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS);
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+
+    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
+
+    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
+                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (global_buf)
+        HDfree(global_buf);
+    if (read_buf)
+        HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests parallel read of filtered data from shared
+ * chunks using a compound datatype which doesn't
+ * require a datatype conversion.
+ *
+ * The MAINPROCESS rank will first write out all of the
+ * data to the dataset. Then, each rank reads a part of
+ * each chunk of the dataset and contributes its piece
+ * to a global buffer that is checked for consistency.
+ *
+ * Programmer: Jordan Henderson
+ *             05/17/2018
+ */
+static void
+test_read_cmpd_filtered_dataset_no_conversion_shared(const char *parent_group, H5Z_filter_t filter_id,
+                                                     hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
+{
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    COMPOUND_C_DATATYPE *global_buf  = NULL;
+    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              flat_dims[1];
+    size_t               i, read_buf_size, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID,
+          memtype   = H5I_INVALID_HID;
+    hid_t group_id  = H5I_INVALID_HID;
+    hid_t filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int * recvcounts = NULL;
+    int * displs     = NULL;
+
+    if (MAINPROCESS)
+        HDputs("Testing read from shared filtered chunks in Compound Datatype dataset without Datatype "
+               "conversion");
+
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
+
+    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NROWS;
+    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NCOLS;
+
+    /* Setup the buffer for writing and for comparison */
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+
+    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
+        correct_buf[i].field1 =
+            (short)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                    (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+
+        correct_buf[i].field2 =
+            (int)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                  (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+
+        correct_buf[i].field3 =
+            (long)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                   (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    }
+
+    /* Create the compound type for memory. */
+    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
+    VRFY((memtype >= 0), "Datatype creation succeeded");
+
+    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
+         "Datatype insertion succeeded");
+
+    if (MAINPROCESS) {
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
+                                     dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        chunk_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
+
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
+                           chunk_dims) >= 0),
+             "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME,
+                             memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id =
+        H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
+
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    /*
+     * Each process defines the dataset selection in the file and
+     * reads it to the selection in memory
+     */
+    count[0]  = 1;
+    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    stride[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
+    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
+    block[0]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank;
+    start[1]  = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+
+    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
+
+    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
+                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (global_buf)
+        HDfree(global_buf);
+    if (read_buf)
+        HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests parallel read of filtered data from unshared
+ * chunks using a compound datatype which requires a
+ * datatype conversion.
+ *
+ * The MAINPROCESS rank will first write out all of the
+ * data to the dataset. Then, each rank reads a part of
+ * the dataset and contributes its piece to a global
+ * buffer that is checked for consistency.
+ *
+ * Programmer: Jordan Henderson
+ *             05/17/2018
+ */
+static void
+test_read_cmpd_filtered_dataset_type_conversion_unshared(const char *parent_group, H5Z_filter_t filter_id,
+                                                         hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
+{
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    COMPOUND_C_DATATYPE *global_buf  = NULL;
+    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
+    hsize_t              flat_dims[1];
+    size_t               i, read_buf_size, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t                filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID;
+    hid_t                group_id  = H5I_INVALID_HID;
+    hid_t                filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int *                recvcounts = NULL;
+    int *                displs     = NULL;
+
+    if (MAINPROCESS)
+        HDputs("Testing read from unshared filtered chunks in Compound Datatype dataset with Datatype "
+               "conversion");
+
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
+
+    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NROWS;
+    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NCOLS;
+
+    /* Setup the buffer for writing and for comparison */
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+
+    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
+        correct_buf[i].field1 = (short)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+
+        correct_buf[i].field2 = (int)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+
+        correct_buf[i].field3 = (long)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+    }
+
+    /* Create the compound type for memory. */
+    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
+    VRFY((memtype >= 0), "Datatype creation succeeded");
+
+    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
+         "Datatype insertion succeeded");
+
+    /* Create the compound type for file. */
+    filetype = H5Tcreate(H5T_COMPOUND, 32);
+    VRFY((filetype >= 0), "Datatype creation succeeded");
+
+    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+
+    if (MAINPROCESS) {
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
+                                     dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        chunk_dims[0] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+        chunk_dims[1] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
+
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
+                           chunk_dims) >= 0),
+             "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME,
+                             filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id =
+        H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    /*
+     * Each process defines the dataset selection in the file and
+     * reads it to the selection in memory
+     */
+    count[0]  = 1;
+    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    stride[0] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
+    block[0]  = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
+    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
+    start[0]  = 0;
+    start[1]  = ((hsize_t)mpi_rank * READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS);
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+
+    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
+
+    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
+                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (global_buf)
+        HDfree(global_buf);
+    if (read_buf)
+        HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests parallel read of filtered data from shared
+ * chunks using a compound datatype which requires
+ * a datatype conversion.
+ *
+ * The MAINPROCESS rank will first write out all of the
+ * data to the dataset. Then, each rank reads a part of
+ * each chunk of the dataset and contributes its pieces
+ * to a global buffer that is checked for consistency.
+ *
+ * Programmer: Jordan Henderson
+ *             05/17/2018
+ */
+static void
+test_read_cmpd_filtered_dataset_type_conversion_shared(const char *parent_group, H5Z_filter_t filter_id,
+                                                       hid_t fapl_id, hid_t dcpl_id, hid_t dxpl_id)
+{
+    COMPOUND_C_DATATYPE *read_buf    = NULL;
+    COMPOUND_C_DATATYPE *correct_buf = NULL;
+    COMPOUND_C_DATATYPE *global_buf  = NULL;
+    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
+    hsize_t              flat_dims[1];
+    size_t               i, read_buf_size, correct_buf_size;
+    hid_t                file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t                filetype = H5I_INVALID_HID, memtype = H5I_INVALID_HID;
+    hid_t                group_id  = H5I_INVALID_HID;
+    hid_t                filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+    int *                recvcounts = NULL;
+    int *                displs     = NULL;
+
+    if (MAINPROCESS)
+        HDputs(
+            "Testing read from shared filtered chunks in Compound Datatype dataset with Datatype conversion");
+
+    /* SZIP and ScaleOffset filters don't support compound types */
+    if (filter_id == H5Z_FILTER_SZIP || filter_id == H5Z_FILTER_SCALEOFFSET) {
+        if (MAINPROCESS)
+            SKIPPED();
+        return;
+    }
+
+    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NROWS;
+    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NCOLS;
+
+    /* Setup the buffer for writing and for comparison */
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+
+    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
+        correct_buf[i].field1 =
+            (short)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                    (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+
+        correct_buf[i].field2 =
+            (int)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                  (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+
+        correct_buf[i].field3 =
+            (long)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
+                   (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    }
+
+    /* Create the compound type for memory. */
+    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
+    VRFY((memtype >= 0), "Datatype creation succeeded");
+
+    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
+         "Datatype insertion succeeded");
+    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
+         "Datatype insertion succeeded");
+
+    /* Create the compound type for file. */
+    filetype = H5Tcreate(H5T_COMPOUND, 32);
+    VRFY((filetype >= 0), "Datatype creation succeeded");
+
+    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+
+    if (MAINPROCESS) {
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
+                                     dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        chunk_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
+        chunk_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
+
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
+                           chunk_dims) >= 0),
+             "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME,
+                             filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id =
+        H5Dopen2(group_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
+
+    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
+    flat_dims[0] = sel_dims[0] * sel_dims[1];
+
+    memspace = H5Screate_simple(1, flat_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    /*
+     * Each process defines the dataset selection in the file and
+     * reads it to the selection in memory
+     */
+    count[0]  = 1;
+    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    stride[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
+    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
+    block[0]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank;
+    start[1]  = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+
+    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    VRFY((H5Dread(dset_id, memtype, memspace, filespace, dxpl_id, read_buf) >= 0), "Dataset read succeeded");
+
+    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != global_buf), "HDcalloc succeeded");
+
+    /* Collect each piece of data from all ranks into a global buffer on all ranks */
+    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+
+    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++)
+        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
+                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
+         "MPI_Allgatherv succeeded");
+
+    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (global_buf)
+        HDfree(global_buf);
+    if (read_buf)
+        HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+/*
+ * Tests write of filtered data to a dataset
+ * by a single process. After the write has
+ * succeeded, the dataset is closed and then
+ * re-opened in parallel and read by all
+ * processes to ensure data correctness.
+ *
+ * Programmer: Jordan Henderson
+ *             08/03/2017
+ */
+static void
+test_write_serial_read_parallel(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                hid_t dcpl_id, hid_t dxpl_id)
+{
+    C_DATATYPE *data        = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    hsize_t     dataset_dims[WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS];
+    size_t      i, data_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
+
+    if (MAINPROCESS)
+        HDputs("Testing write file serially; read file in parallel");
+
+    dataset_dims[0] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_NCOLS;
+    dataset_dims[2] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_DEPTH;
+
+    /* Write the file on the MAINPROCESS rank */
+    if (MAINPROCESS) {
+        /* Set up file access property list */
+        plist_id = H5Pcreate(H5P_FILE_ACCESS);
+        VRFY((plist_id >= 0), "FAPL creation succeeded");
+
+        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+             "Set libver bounds succeeded");
+
+        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+        VRFY((file_id >= 0), "Test file open succeeded");
+
+        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+        /* Create the dataspace for the dataset */
+        chunk_dims[0] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_CH_NROWS;
+        chunk_dims[1] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_CH_NCOLS;
+        chunk_dims[2] = 1;
+
+        filespace = H5Screate_simple(WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS, dataset_dims, NULL);
+        VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+        /* Create chunked dataset */
+        plist_id = H5Pcopy(dcpl_id);
+        VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+        VRFY((H5Pset_chunk(plist_id, WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS, chunk_dims) >= 0),
+             "Chunk size set");
+
+        /* Add test filter to the pipeline */
+        VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+        dset_id = H5Dcreate2(group_id, WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                             H5P_DEFAULT, plist_id, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+        data_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*data);
+
+        data = (C_DATATYPE *)HDcalloc(1, data_size);
+        VRFY((NULL != data), "HDcalloc succeeded");
+
+        for (i = 0; i < data_size / sizeof(*data); i++)
+            data[i] = (C_DATATYPE)GEN_DATA(i);
+
+        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, data) >= 0),
+             "Dataset write succeeded");
+
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+        if (data)
+            HDfree(data);
+
+        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    }
+
+    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+
+    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+
+    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+        correct_buf[i] = (long)i;
+
+    /* All ranks open the file and verify their "portion" of the dataset is correct */
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    dset_id = H5Dopen2(group_id, WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+
+    if (correct_buf)
+        HDfree(correct_buf);
+    if (read_buf)
+        HDfree(read_buf);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+    return;
+}
+
+#ifdef H5_HAVE_PARALLEL_FILTERED_WRITES
+/*
+ * Tests parallel write of filtered data
+ * to a dataset. After the write has
+ * succeeded, the dataset is closed and
+ * then re-opened and read by a single
+ * process to ensure data correctness.
+ *
+ * Programmer: Jordan Henderson
+ *             08/03/2017
+ */
+static void
+test_write_parallel_read_serial(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id,
+                                hid_t dcpl_id, hid_t dxpl_id)
+{
+    C_DATATYPE *data        = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    hsize_t     dataset_dims[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
+    hsize_t     count[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
+    hsize_t     stride[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
+    hsize_t     block[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
+    hsize_t     offset[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
+    size_t      i, data_size, correct_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
+
+    if (MAINPROCESS)
+        HDputs("Testing write file in parallel; read serially");
+
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
+
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
+
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NCOLS;
+    dataset_dims[2] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_DEPTH;
+    chunk_dims[0]   = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
+    chunk_dims[2]   = 1;
+    sel_dims[0]     = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NCOLS;
+    sel_dims[2]     = (hsize_t)WRITE_PARALLEL_READ_SERIAL_DEPTH;
+
+    filespace = H5Screate_simple(WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS, dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
+
+    memspace = H5Screate_simple(WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS, sel_dims, NULL);
+    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+    VRFY((H5Pset_chunk(plist_id, WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS, chunk_dims) >= 0),
+         "Chunk size set");
+
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+    dset_id = H5Dcreate2(group_id, WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+                         H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0]  = 1;
+    count[1]  = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NCOLS / (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
+    count[2]  = (hsize_t)mpi_size;
+    stride[0] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
+    stride[2] = 1;
+    block[0]  = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
+    block[1]  = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
+    block[2]  = 1;
+    offset[0] = ((hsize_t)mpi_rank * (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS * count[0]);
+    offset[1] = 0;
+    offset[2] = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], offset[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
+                 " ]\n",
+                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], offset[0],
+                 offset[1], offset[2], block[0], block[1], block[2]);
+        HDfflush(stdout);
+    }
+
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    /* Fill data buffer */
+    data_size = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
+
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-        correct_buf[i].field2 = (int)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-        correct_buf[i].field3 = (long)((i % dataset_dims[1]) + (i / dataset_dims[1]));
-    }
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    /* Create the compound type for memory. */
-    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
-    VRFY((memtype >= 0), "Datatype creation succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
-         "Datatype insertion succeeded");
+    if (data)
+        HDfree(data);
+
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     if (MAINPROCESS) {
         plist_id = H5Pcreate(H5P_FILE_ACCESS);
@@ -5153,783 +7215,794 @@ test_read_cmpd_filtered_dataset_no_conversion_unshared(void)
 
         VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
 
-        /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
-                                     dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
+        group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+        VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-        /* Create chunked dataset */
-        chunk_dims[0] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-        chunk_dims[1] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
+        dset_id = H5Dopen2(group_id, WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, H5P_DEFAULT);
+        VRFY((dset_id >= 0), "Dataset open succeeded");
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+        correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
 
-        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_DIMS,
-                           chunk_dims) >= 0),
-             "Chunk size set");
+        correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+        VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+        read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
+        VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-        dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME,
-                             memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
+        for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
+            correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] * dataset_dims[1])) +
+                                          (i / (dataset_dims[0] * dataset_dims[1])));
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf) >= 0),
+             "Dataset read succeeded");
 
-        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
-             "Dataset write succeeded");
+        VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
 
         VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+        VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
         VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+
+        HDfree(correct_buf);
+        HDfree(read_buf);
     }
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    return;
+}
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+/*
+ * Tests that causing chunks to continually grow and shrink
+ * by writing random data followed by zeroed-out data (and
+ * thus controlling the compression ratio) does not cause
+ * problems.
+ *
+ * Programmer: Jordan Henderson
+ *             06/04/2018
+ */
+static void
+test_shrinking_growing_chunks(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                              hid_t dxpl_id)
+{
+    double *data     = NULL;
+    double *read_buf = NULL;
+    hsize_t dataset_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
+    hsize_t chunk_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
+    hsize_t sel_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
+    hsize_t start[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
+    hsize_t stride[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
+    hsize_t count[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
+    hsize_t block[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
+    size_t  i, data_size;
+    hid_t   file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t   group_id  = H5I_INVALID_HID;
+    hid_t   filespace = H5I_INVALID_HID, memspace = H5I_INVALID_HID;
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    if (MAINPROCESS)
+        HDputs("Testing continually shrinking/growing chunks");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
-
-    dset_id =
-        H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)SHRINKING_GROWING_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)SHRINKING_GROWING_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
+    sel_dims[1]     = (hsize_t)SHRINKING_GROWING_CHUNKS_NCOLS;
 
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1];
+    filespace = H5Screate_simple(SHRINKING_GROWING_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
+    memspace = H5Screate_simple(SHRINKING_GROWING_CHUNKS_DATASET_DIMS, sel_dims, NULL);
     VRFY((memspace >= 0), "Memory dataspace creation succeeded");
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
+
+    VRFY((H5Pset_chunk(plist_id, SHRINKING_GROWING_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set");
+
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+    dset_id = H5Dcreate2(group_id, SHRINKING_GROWING_CHUNKS_DATASET_NAME, H5T_NATIVE_DOUBLE, filespace,
+                         H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
+
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
     /*
-     * Each process defines the dataset selection in the file and
-     * reads it to the selection in memory
+     * Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
      */
     count[0]  = 1;
-    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
-    stride[0] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
-    block[0]  = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NROWS;
-    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS;
-    start[0]  = 0;
-    start[1]  = ((hsize_t)mpi_rank * READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_UNSHARED_CH_NCOLS);
+    count[1]  = (hsize_t)SHRINKING_GROWING_CHUNKS_NCOLS / (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
+    stride[0] = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS * count[0]);
+    start[1]  = 0;
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ]\n",
                  mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((dset_id >= 0), "File dataspace retrieval succeeded");
+
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(double);
 
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
+    data = (double *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
+    read_buf = (double *)HDcalloc(1, data_size);
     VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-    VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded");
-
-    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
-
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    for (i = 0; i < SHRINKING_GROWING_CHUNKS_NLOOPS; i++) {
+        /* Continually write random float data, followed by zeroed-out data */
+        if (i % 2)
+            HDmemset(data, 0, data_size);
+        else {
+            size_t j;
+            for (j = 0; j < data_size / sizeof(*data); j++) {
+                data[j] = (rand() / (double)(RAND_MAX / (double)1.0L));
+            }
+        }
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+        VRFY((H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, dxpl_id, data) >= 0),
+             "Dataset write succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+        /* Verify space allocation status */
+        verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+        if (i % 2) {
+            HDmemset(read_buf, 255, data_size);
+        }
+        else {
+            HDmemset(read_buf, 0, data_size);
+        }
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
-                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
-         "MPI_Allgatherv succeeded");
+        VRFY((H5Dread(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, dxpl_id, read_buf) >= 0),
+             "Dataset read succeeded");
 
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+        VRFY((0 == HDmemcmp(read_buf, data, data_size)), "data verification succeeded");
+    }
 
-    if (displs)
-        HDfree(displs);
-    if (recvcounts)
-        HDfree(recvcounts);
-    if (global_buf)
-        HDfree(global_buf);
     if (read_buf)
         HDfree(read_buf);
-    if (correct_buf)
-        HDfree(correct_buf);
+    if (data)
+        HDfree(data);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
     VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data from shared
- * chunks using a compound datatype which doesn't
- * require a datatype conversion.
- *
- * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * each chunk of the dataset and contributes its piece
- * to a global buffer that is checked for consistency.
+ * Tests that filtered and unfiltered partial edge chunks can be
+ * written to and read from correctly in parallel when only one MPI
+ * rank writes to a particular partial edge chunk in the dataset.
  *
- * Programmer: Jordan Henderson
- *             05/17/2018
+ * The dataset contains partial edge chunks in the second dimension.
+ * Each MPI rank selects a hyperslab in the shape of a single chunk
+ * that is offset to cover the whole edge chunk and part of the
+ * full chunk next to the edge chunk.
  */
 static void
-test_read_cmpd_filtered_dataset_no_conversion_shared(void)
+test_edge_chunks_no_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                            hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    COMPOUND_C_DATATYPE *global_buf  = NULL;
-    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              flat_dims[1];
-    size_t               i, read_buf_size, correct_buf_size;
-    hid_t                file_id, dset_id, plist_id, memtype;
-    hid_t                filespace, memspace;
-    int *                recvcounts = NULL;
-    int *                displs     = NULL;
+    C_DATATYPE *data     = NULL;
+    C_DATATYPE *read_buf = NULL;
+    hsize_t     dataset_dims[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     start[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     count[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     block[WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    size_t      i, data_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing read from shared filtered chunks in Compound Datatype dataset without Datatype "
-               "conversion");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NROWS;
-    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_NCOLS;
-
-    /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
-
-    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+        HDputs("Testing write to unshared filtered edge chunks");
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
-        correct_buf[i].field1 =
-            (short)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                    (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-        correct_buf[i].field2 =
-            (int)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                  (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-        correct_buf[i].field3 =
-            (long)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                   (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
-    }
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    sel_dims[1]     = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+
+    filespace = H5Screate_simple(WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    /* Create the compound type for memory. */
-    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
-    VRFY((memtype >= 0), "Datatype creation succeeded");
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
-         "Datatype insertion succeeded");
+    VRFY((H5Pset_chunk(plist_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+         "Chunk size set");
 
-    if (MAINPROCESS) {
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-        /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
-                                     dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0]  = 1;
+    count[1]  = 1;
+    stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS);
+    start[1] =
+        (hsize_t)(WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS);
 
-        /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_DIMS,
-                           chunk_dims) >= 0),
-             "Chunk size set");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    /* Fill data buffer */
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
-        dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME,
-                             memtype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    read_buf = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
-             "Dataset write succeeded");
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-    }
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, (mpi_size > 1) ? SOME_CHUNKS_WRITTEN : ALL_CHUNKS_WRITTEN);
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    /* Verify the correct data was written */
+    dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
 
-    dset_id =
-        H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    /* Repeat the previous, but set option to not filter partial edge chunks */
+    if (MAINPROCESS)
+        HDputs("Testing write to unshared unfiltered edge chunks");
 
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1];
+    H5Pset_chunk_opts(plist_id, H5D_CHUNK_DONT_FILTER_PARTIAL_CHUNKS);
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+    dset_id = H5Dcreate2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, HDF5_DATATYPE_NAME,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    /*
-     * Each process defines the dataset selection in the file and
-     * reads it to the selection in memory
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
      */
     count[0]  = 1;
-    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_ENTRIES_PER_PROC;
-    stride[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS;
-    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
-    block[0]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_NO_CONVERSION_SHARED_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank;
-    start[1]  = 0;
+    count[1]  = 1;
+    stride[0] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    block[1]  = (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    start[0]  = ((hsize_t)mpi_rank * (hsize_t)WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS);
+    start[1] =
+        (hsize_t)(WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS);
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ]\n",
                  mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
-
-    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
-
-    VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded");
-
-    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, (mpi_size > 1) ? SOME_CHUNKS_WRITTEN : ALL_CHUNKS_WRITTEN);
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+    /* Verify the correct data was written */
+    dset_id = H5Dopen2(group_id, WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+    HDmemset(read_buf, 255, data_size);
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
-                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
-         "MPI_Allgatherv succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
 
-    if (displs)
-        HDfree(displs);
-    if (recvcounts)
-        HDfree(recvcounts);
-    if (global_buf)
-        HDfree(global_buf);
+    if (data)
+        HDfree(data);
     if (read_buf)
         HDfree(read_buf);
-    if (correct_buf)
-        HDfree(correct_buf);
 
-    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data from unshared
- * chunks using a compound datatype which requires a
- * datatype conversion.
- *
- * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * the dataset and contributes its piece to a global
- * buffer that is checked for consistency.
+ * Tests that filtered and unfiltered partial edge chunks can be
+ * written to and read from correctly in parallel when every MPI
+ * rank writes to every partial edge chunk in the dataset.
  *
- * Programmer: Jordan Henderson
- *             05/17/2018
+ * The dataset contains partial edge chunks in the second dimension.
+ * Each MPI rank selects a hyperslab in the shape of one row of each
+ * chunk that is offset in the second dimension to cover the whole
+ * edge chunk and part of the full chunk next to the edge chunk.
  */
 static void
-test_read_cmpd_filtered_dataset_type_conversion_unshared(void)
+test_edge_chunks_overlap(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                         hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    COMPOUND_C_DATATYPE *global_buf  = NULL;
-    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS];
-    hsize_t              flat_dims[1];
-    size_t               i, read_buf_size, correct_buf_size;
-    hid_t                file_id = -1, dset_id = -1, plist_id = -1, filetype = -1, memtype = -1;
-    hid_t                filespace = -1, memspace = -1;
-    int *                recvcounts = NULL;
-    int *                displs     = NULL;
+    C_DATATYPE *data     = NULL;
+    C_DATATYPE *read_buf = NULL;
+    hsize_t     dataset_dims[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     chunk_dims[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     sel_dims[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     start[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     stride[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     count[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    hsize_t     block[WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS];
+    size_t      i, data_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id  = H5I_INVALID_HID;
+    hid_t       filespace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing read from unshared filtered chunks in Compound Datatype dataset with Datatype "
-               "conversion");
-
-    CHECK_CUR_FILTER_AVAIL();
-
-    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NROWS;
-    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_NCOLS;
-
-    /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
-
-    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+        HDputs("Testing write to shared filtered edge chunks");
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
-        correct_buf[i].field1 = (short)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-        correct_buf[i].field2 = (int)((i % dataset_dims[1]) + (i / dataset_dims[1]));
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-        correct_buf[i].field3 = (long)((i % dataset_dims[1]) + (i / dataset_dims[1]));
-    }
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS;
+    dataset_dims[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS;
+    chunk_dims[0]   = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1]     = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
 
-    /* Create the compound type for memory. */
-    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
-    VRFY((memtype >= 0), "Datatype creation succeeded");
+    filespace = H5Screate_simple(WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
-         "Datatype insertion succeeded");
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    /* Create the compound type for file. */
-    filetype = H5Tcreate(H5T_COMPOUND, 32);
-    VRFY((filetype >= 0), "Datatype creation succeeded");
+    VRFY((H5Pset_chunk(plist_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS, chunk_dims) >= 0),
+         "Chunk size set");
 
-    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    if (MAINPROCESS) {
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
+    dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, HDF5_DATATYPE_NAME,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0] =
+        (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS / WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS);
+    count[1]  = 1;
+    stride[0] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)1;
+    block[1]  = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank;
+    start[1] =
+        (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS);
 
-        /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
-                                     dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-        /* Create chunked dataset */
-        chunk_dims[0] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-        chunk_dims[1] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
-        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_DIMS,
-                           chunk_dims) >= 0),
-             "Chunk size set");
+    /* Fill data buffer */
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-        dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME,
-                             filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
+    read_buf = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
-             "Dataset write succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-    }
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    /* Verify the correct data was written */
+    dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_DATASET_NAME,
-                       H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+    /* Repeat the previous, but set option to not filter partial edge chunks */
+    if (MAINPROCESS)
+        HDputs("Testing write to shared unfiltered edge chunks");
 
-    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
+    H5Pset_chunk_opts(plist_id, H5D_CHUNK_DONT_FILTER_PARTIAL_CHUNKS);
 
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1];
+    dset_id = H5Dcreate2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, HDF5_DATATYPE_NAME,
+                         filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /*
-     * Each process defines the dataset selection in the file and
-     * reads it to the selection in memory
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
      */
-    count[0]  = 1;
-    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_ENTRIES_PER_PROC;
-    stride[0] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
-    block[0]  = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NROWS;
-    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS;
-    start[0]  = 0;
-    start[1]  = ((hsize_t)mpi_rank * READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_UNSHARED_CH_NCOLS);
+    count[0] =
+        (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS / WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS);
+    count[1]  = 1;
+    stride[0] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS;
+    stride[1] = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    block[0]  = (hsize_t)1;
+    block[1]  = (hsize_t)WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank;
+    start[1] =
+        (hsize_t)(WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS - WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS);
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ]\n",
                  mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
-
-    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
-
-    VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded");
-
-    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+    /* Verify the correct data was written */
+    dset_id = H5Dopen2(group_id, WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+    HDmemset(read_buf, 255, data_size);
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
-                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
-         "MPI_Allgatherv succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    VRFY((0 == HDmemcmp(read_buf, data, data_size)), "Data verification succeeded");
 
-    if (displs)
-        HDfree(displs);
-    if (recvcounts)
-        HDfree(recvcounts);
-    if (global_buf)
-        HDfree(global_buf);
+    if (data)
+        HDfree(data);
     if (read_buf)
         HDfree(read_buf);
-    if (correct_buf)
-        HDfree(correct_buf);
 
-    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(filetype) >= 0), "File datatype close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests parallel read of filtered data from shared
- * chunks using a compound datatype which requires
- * a datatype conversion.
- *
- * The MAINPROCESS rank will first write out all of the
- * data to the dataset. Then, each rank reads a part of
- * each chunk of the dataset and contributes its pieces
- * to a global buffer that is checked for consistency.
+ * Tests that filtered and unfiltered partial edge chunks can be
+ * written to and read from correctly in parallel when only one
+ * MPI rank writes to a particular edge chunk in the dataset and
+ * only performs a partial write to the edge chunk.
  *
- * Programmer: Jordan Henderson
- *             05/17/2018
+ * The dataset contains partial edge chunks in the second dimension.
+ * Each MPI rank selects a hyperslab in the shape of part of a single
+ * edge chunk and writes to just a portion of the edge chunk.
  */
 static void
-test_read_cmpd_filtered_dataset_type_conversion_shared(void)
+test_edge_chunks_partial_write(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                               hid_t dxpl_id)
 {
-    COMPOUND_C_DATATYPE *read_buf    = NULL;
-    COMPOUND_C_DATATYPE *correct_buf = NULL;
-    COMPOUND_C_DATATYPE *global_buf  = NULL;
-    hsize_t              dataset_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              chunk_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              sel_dims[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              start[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              stride[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              count[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              block[READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS];
-    hsize_t              flat_dims[1];
-    size_t               i, read_buf_size, correct_buf_size;
-    hid_t                file_id, dset_id, plist_id, filetype, memtype;
-    hid_t                filespace, memspace;
-    int *                recvcounts = NULL;
-    int *                displs     = NULL;
+    /* TODO */
+}
+
+/*
+ * Tests that the parallel compression feature correctly handles
+ * writing fill values to a dataset and reading fill values from
+ * unallocated parts of a dataset.
+ */
+static void
+test_fill_values(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                 hid_t dxpl_id)
+{
+    C_DATATYPE *data        = NULL;
+    C_DATATYPE *read_buf    = NULL;
+    C_DATATYPE *correct_buf = NULL;
+    C_DATATYPE  fill_value;
+    hsize_t     dataset_dims[FILL_VALUES_TEST_DATASET_DIMS];
+    hsize_t     chunk_dims[FILL_VALUES_TEST_DATASET_DIMS];
+    hsize_t     sel_dims[FILL_VALUES_TEST_DATASET_DIMS];
+    hsize_t     start[FILL_VALUES_TEST_DATASET_DIMS];
+    hsize_t     stride[FILL_VALUES_TEST_DATASET_DIMS];
+    hsize_t     count[FILL_VALUES_TEST_DATASET_DIMS];
+    hsize_t     block[FILL_VALUES_TEST_DATASET_DIMS];
+    size_t      i, data_size, read_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id   = H5I_INVALID_HID;
+    hid_t       filespace  = H5I_INVALID_HID;
+    int *       recvcounts = NULL;
+    int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs(
-            "Testing read from shared filtered chunks in Compound Datatype dataset with Datatype conversion");
+        HDputs("Testing fill values");
 
-    CHECK_CUR_FILTER_AVAIL();
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+    VRFY((file_id >= 0), "Test file open succeeded");
 
-    dataset_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NROWS;
-    dataset_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_NCOLS;
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
-    /* Setup the buffer for writing and for comparison */
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*correct_buf);
+    /* Create the dataspace for the dataset */
+    dataset_dims[0] = (hsize_t)FILL_VALUES_TEST_NROWS;
+    dataset_dims[1] = (hsize_t)FILL_VALUES_TEST_NCOLS;
+    chunk_dims[0]   = (hsize_t)FILL_VALUES_TEST_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1]     = (hsize_t)FILL_VALUES_TEST_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
 
-    correct_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+    filespace = H5Screate_simple(FILL_VALUES_TEST_DATASET_DIMS, dataset_dims, NULL);
+    VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++) {
-        correct_buf[i].field1 =
-            (short)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                    (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    /* Create chunked dataset */
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-        correct_buf[i].field2 =
-            (int)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                  (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
+    VRFY((H5Pset_chunk(plist_id, FILL_VALUES_TEST_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set");
 
-        correct_buf[i].field3 =
-            (long)((dataset_dims[1] * (i / ((hsize_t)mpi_size * dataset_dims[1]))) + (i % dataset_dims[1]) +
-                   (((i % ((hsize_t)mpi_size * dataset_dims[1])) / dataset_dims[1]) % dataset_dims[1]));
-    }
+    /* Add test filter to the pipeline */
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    /* Create the compound type for memory. */
-    memtype = H5Tcreate(H5T_COMPOUND, sizeof(COMPOUND_C_DATATYPE));
-    VRFY((memtype >= 0), "Datatype creation succeeded");
+    /* Set a fill value */
+    fill_value = FILL_VALUES_TEST_FILL_VAL;
+    VRFY((H5Pset_fill_value(plist_id, HDF5_DATATYPE_NAME, &fill_value) >= 0), "Fill Value set");
 
-    VRFY((H5Tinsert(memtype, "ShortData", HOFFSET(COMPOUND_C_DATATYPE, field1), H5T_NATIVE_SHORT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "IntData", HOFFSET(COMPOUND_C_DATATYPE, field2), H5T_NATIVE_INT) >= 0),
-         "Datatype insertion succeeded");
-    VRFY((H5Tinsert(memtype, "LongData", HOFFSET(COMPOUND_C_DATATYPE, field3), H5T_NATIVE_LONG) >= 0),
-         "Datatype insertion succeeded");
+    dset_id = H5Dcreate2(group_id, FILL_VALUES_TEST_DATASET_NAME, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT,
+                         plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    /* Create the compound type for file. */
-    filetype = H5Tcreate(H5T_COMPOUND, 32);
-    VRFY((filetype >= 0), "Datatype creation succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-    VRFY((H5Tinsert(filetype, "ShortData", 0, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "IntData", 8, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
-    VRFY((H5Tinsert(filetype, "LongData", 16, H5T_STD_I64BE) >= 0), "Datatype insertion succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    if (MAINPROCESS) {
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
+    /* Allocate buffer for reading entire dataset */
+    read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf);
 
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
+    read_buf = HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
 
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
+    correct_buf = HDcalloc(1, read_buf_size);
+    VRFY((NULL != correct_buf), "HDcalloc succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    /* Read entire dataset and verify that the fill value is returned */
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-        /* Create the dataspace for the dataset */
-        filespace = H5Screate_simple(READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
-                                     dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
+    for (i = 0; i < read_buf_size / sizeof(*read_buf); i++)
+        correct_buf[i] = FILL_VALUES_TEST_FILL_VAL;
 
-        /* Create chunked dataset */
-        chunk_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
-        chunk_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded");
 
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+    /*
+     * Write to part of the first chunk in the dataset with
+     * all ranks, then read the whole dataset and ensure that
+     * the fill value is returned for the unwritten part of
+     * the chunk, as well as for the rest of the dataset that
+     * hasn't been written to yet.
+     */
+    count[0]  = 1;
+    count[1]  = 1;
+    stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    block[0]  = 1;
+    block[1]  = (hsize_t)(FILL_VALUES_TEST_CH_NCOLS - 1);
+    start[0]  = (hsize_t)mpi_rank;
+    start[1]  = 0;
 
-        VRFY((H5Pset_chunk(plist_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_DIMS,
-                           chunk_dims) >= 0),
-             "Chunk size set");
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-        dset_id = H5Dcreate2(file_id, READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME,
-                             filetype, filespace, H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    /* Fill data buffer */
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
-        VRFY((H5Dwrite(dset_id, memtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, correct_buf) >= 0),
-             "Dataset write succeeded");
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-    }
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDONLY, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    /* Verify correct data was written */
+    dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-    dset_id =
-        H5Dopen2(file_id, "/" READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_DATASET_NAME, H5P_DEFAULT);
-    VRFY((dset_id >= 0), "Dataset open succeeded");
+    /*
+     * Each MPI rank communicates their written piece of data
+     * into each other rank's correctness-checking buffer
+     */
+    recvcounts = HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
 
-    sel_dims[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    sel_dims[1] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
+    displs = HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
 
-    /* Setup one-dimensional memory dataspace for reading the dataset data into a contiguous buffer */
-    flat_dims[0] = sel_dims[0] * sel_dims[1];
+    for (i = 0; i < (size_t)mpi_size; i++) {
+        recvcounts[i] = (int)(count[1] * block[1]);
+        displs[i]     = (int)(i * dataset_dims[1]);
+    }
 
-    memspace = H5Screate_simple(1, flat_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(data, recvcounts[mpi_rank], C_DATATYPE_MPI, correct_buf, recvcounts,
+                                        displs, C_DATATYPE_MPI, comm)),
+         "MPI_Allgatherv succeeded");
 
-    /* Select hyperslab in the file */
-    filespace = H5Dget_space(dset_id);
-    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded");
 
     /*
-     * Each process defines the dataset selection in the file and
-     * reads it to the selection in memory
+     * Write to whole dataset and ensure fill value isn't returned
+     * after reading whole dataset back
      */
-    count[0]  = 1;
-    count[1]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_ENTRIES_PER_PROC;
-    stride[0] = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS;
-    stride[1] = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
-    block[0]  = (hsize_t)READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NROWS / (hsize_t)mpi_size;
-    block[1]  = READ_COMPOUND_FILTERED_CHUNKS_TYPE_CONVERSION_SHARED_CH_NCOLS;
-    start[0]  = (hsize_t)mpi_rank;
+
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0]  = (hsize_t)FILL_VALUES_TEST_NROWS / (hsize_t)FILL_VALUES_TEST_CH_NROWS;
+    count[1]  = (hsize_t)FILL_VALUES_TEST_NCOLS / (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    block[0]  = (hsize_t)FILL_VALUES_TEST_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * block[0];
     start[1]  = 0;
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is reading with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
                  ", %" PRIuHSIZE " ]\n",
                  mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
@@ -5939,304 +8012,290 @@ test_read_cmpd_filtered_dataset_type_conversion_shared(void)
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset read */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    read_buf_size = flat_dims[0] * sizeof(*read_buf);
-
-    read_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, read_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
-
-    VRFY((H5Dread(dset_id, memtype, memspace, filespace, plist_id, read_buf) >= 0), "Dataset read succeeded");
-
-    global_buf = (COMPOUND_C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != global_buf), "HDcalloc succeeded");
-
-    /* Collect each piece of data from all ranks into a global buffer on all ranks */
-    recvcounts = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
-    VRFY((NULL != recvcounts), "HDcalloc succeeded");
-
-    for (i = 0; i < (size_t)mpi_size; i++)
-        recvcounts[i] = (int)(flat_dims[0] * sizeof(*read_buf));
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    displs = (int *)HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
-    VRFY((NULL != displs), "HDcalloc succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    for (i = 0; i < (size_t)mpi_size; i++)
-        displs[i] = (int)(i * flat_dims[0] * sizeof(*read_buf));
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    VRFY((MPI_SUCCESS == MPI_Allgatherv(read_buf, (int)(flat_dims[0] * sizeof(COMPOUND_C_DATATYPE)), MPI_BYTE,
-                                        global_buf, recvcounts, displs, MPI_BYTE, comm)),
-         "MPI_Allgatherv succeeded");
+    /* Verify correct data was written */
+    dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    VRFY((0 == HDmemcmp(global_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-    if (displs)
-        HDfree(displs);
-    if (recvcounts)
-        HDfree(recvcounts);
-    if (global_buf)
-        HDfree(global_buf);
-    if (read_buf)
-        HDfree(read_buf);
-    if (correct_buf)
-        HDfree(correct_buf);
+    for (i = 0; i < read_buf_size / sizeof(*read_buf); i++)
+        VRFY((read_buf[i] != FILL_VALUES_TEST_FILL_VAL), "Data verification succeeded");
 
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Tclose(memtype) >= 0), "Memory datatype close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
-    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-
-    return;
-}
-
-/*
- * Tests write of filtered data to a dataset
- * by a single process. After the write has
- * succeeded, the dataset is closed and then
- * re-opened in parallel and read by all
- * processes to ensure data correctness.
- *
- * Programmer: Jordan Henderson
- *             08/03/2017
- */
-static void
-test_write_serial_read_parallel(void)
-{
-    C_DATATYPE *data        = NULL;
-    C_DATATYPE *read_buf    = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS];
-    size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1;
 
-    if (MAINPROCESS)
-        HDputs("Testing write file serially; read file in parallel");
+    /********************************************************************
+     * Set the fill time to H5D_FILL_TIME_ALLOC and repeat the previous *
+     ********************************************************************/
 
-    CHECK_CUR_FILTER_AVAIL();
-
-    dataset_dims[0] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_NCOLS;
-    dataset_dims[2] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_DEPTH;
+    VRFY((H5Pset_fill_time(plist_id, H5D_FILL_TIME_ALLOC) >= 0), "H5Pset_fill_time succeeded");
 
-    /* Write the file on the MAINPROCESS rank */
-    if (MAINPROCESS) {
-        /* Set up file access property list */
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
+    dset_id = H5Dcreate2(group_id, FILL_VALUES_TEST_DATASET_NAME2, HDF5_DATATYPE_NAME, filespace, H5P_DEFAULT,
+                         plist_id, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
 
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    /* Read entire dataset and verify that the fill value is returned */
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-        /* Create the dataspace for the dataset */
-        chunk_dims[0] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_CH_NROWS;
-        chunk_dims[1] = (hsize_t)WRITE_SERIAL_READ_PARALLEL_CH_NCOLS;
-        chunk_dims[2] = 1;
+    for (i = 0; i < read_buf_size / sizeof(*read_buf); i++)
+        correct_buf[i] = FILL_VALUES_TEST_FILL_VAL;
 
-        filespace = H5Screate_simple(WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS, dataset_dims, NULL);
-        VRFY((filespace >= 0), "File dataspace creation succeeded");
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded");
 
-        /* Create chunked dataset */
-        plist_id = H5Pcreate(H5P_DATASET_CREATE);
-        VRFY((plist_id >= 0), "DCPL creation succeeded");
+    /*
+     * Write to part of the first chunk in the dataset with
+     * all ranks, then read the whole dataset and ensure that
+     * the fill value is returned for the unwritten part of
+     * the chunk, as well as for the rest of the dataset that
+     * hasn't been written to yet.
+     */
+    count[0]  = 1;
+    count[1]  = 1;
+    stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    block[0]  = 1;
+    block[1]  = (hsize_t)(FILL_VALUES_TEST_CH_NCOLS - 1);
+    start[0]  = (hsize_t)mpi_rank;
+    start[1]  = 0;
 
-        VRFY((H5Pset_chunk(plist_id, WRITE_SERIAL_READ_PARALLEL_DATASET_DIMS, chunk_dims) >= 0),
-             "Chunk size set");
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-        /* Add test filter to the pipeline */
-        VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    /* Select hyperslab in the file */
+    filespace = H5Dget_space(dset_id);
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-        dset_id = H5Dcreate2(file_id, WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
-                             H5P_DEFAULT, plist_id, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset creation succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
-        VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-        data_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*data);
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-        data = (C_DATATYPE *)HDcalloc(1, data_size);
-        VRFY((NULL != data), "HDcalloc succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
 
-        for (i = 0; i < data_size / sizeof(*data); i++)
-            data[i] = (C_DATATYPE)GEN_DATA(i);
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-        VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, data) >= 0),
-             "Dataset write succeeded");
+    /* Verify correct data was written */
+    dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME2, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-        if (data)
-            HDfree(data);
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    for (i = 0; i < (size_t)mpi_size; i++) {
+        recvcounts[i] = (int)(count[1] * block[1]);
+        displs[i]     = (int)(i * dataset_dims[1]);
     }
 
-    correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+    /*
+     * Each MPI rank communicates their written piece of data
+     * into each other rank's correctness-checking buffer
+     */
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(data, recvcounts[mpi_rank], C_DATATYPE_MPI, correct_buf, recvcounts,
+                                        displs, C_DATATYPE_MPI, comm)),
+         "MPI_Allgatherv succeeded");
 
-    correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != correct_buf), "HDcalloc succeeded");
+    VRFY((0 == HDmemcmp(read_buf, correct_buf, read_buf_size)), "Data verification succeeded");
 
-    read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-    VRFY((NULL != read_buf), "HDcalloc succeeded");
+    /*
+     * Write to whole dataset and ensure fill value isn't returned
+     * after reading whole dataset back
+     */
 
-    for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-        correct_buf[i] = (long)i;
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0]  = (hsize_t)FILL_VALUES_TEST_NROWS / (hsize_t)FILL_VALUES_TEST_CH_NROWS;
+    count[1]  = (hsize_t)FILL_VALUES_TEST_NCOLS / (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    stride[0] = (hsize_t)FILL_VALUES_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    block[0]  = (hsize_t)FILL_VALUES_TEST_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = (hsize_t)FILL_VALUES_TEST_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * block[0];
+    start[1]  = 0;
 
-    /* All ranks open the file and verify their "portion" of the dataset is correct */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-    VRFY((file_id >= 0), "Test file open succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-    dset_id = H5Dopen2(file_id, "/" WRITE_SERIAL_READ_PARALLEL_DATASET_NAME, H5P_DEFAULT);
+    /* Verify correct data was written */
+    dset_id = H5Dopen2(group_id, FILL_VALUES_TEST_DATASET_NAME2, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset open succeeded");
 
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, plist_id, read_buf) >= 0),
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
          "Dataset read succeeded");
 
-    VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    for (i = 0; i < read_buf_size / sizeof(*read_buf); i++)
+        VRFY((read_buf[i] != FILL_VALUES_TEST_FILL_VAL), "Data verification succeeded");
 
-    if (correct_buf)
-        HDfree(correct_buf);
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
+    if (data)
+        HDfree(data);
     if (read_buf)
         HDfree(read_buf);
+    if (correct_buf)
+        HDfree(correct_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
-#if MPI_VERSION >= 3
 /*
- * Tests parallel write of filtered data
- * to a dataset. After the write has
- * succeeded, the dataset is closed and
- * then re-opened and read by a single
- * process to ensure data correctness.
- *
- * Programmer: Jordan Henderson
- *             08/03/2017
+ * Tests that the parallel compression feature can handle
+ * an undefined fill value. Nothing is verified in this
+ * test since the fill value isn't defined.
  */
 static void
-test_write_parallel_read_serial(void)
+test_fill_value_undefined(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                          hid_t dxpl_id)
 {
-    C_DATATYPE *data        = NULL;
-    C_DATATYPE *read_buf    = NULL;
-    C_DATATYPE *correct_buf = NULL;
-    hsize_t     dataset_dims[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
-    hsize_t     chunk_dims[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
-    hsize_t     sel_dims[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
-    hsize_t     count[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
-    hsize_t     stride[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
-    hsize_t     block[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
-    hsize_t     offset[WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS];
-    size_t      i, data_size, correct_buf_size;
-    hid_t       file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t       filespace = -1, memspace = -1;
+    H5D_alloc_time_t alloc_time;
+    C_DATATYPE *     data     = NULL;
+    C_DATATYPE *     read_buf = NULL;
+    hsize_t          dataset_dims[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS];
+    hsize_t          chunk_dims[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS];
+    hsize_t          sel_dims[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS];
+    hsize_t          start[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS];
+    hsize_t          stride[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS];
+    hsize_t          count[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS];
+    hsize_t          block[FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS];
+    size_t           i, data_size, read_buf_size;
+    hid_t            file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t            group_id  = H5I_INVALID_HID;
+    hid_t            filespace = H5I_INVALID_HID;
 
     if (MAINPROCESS)
-        HDputs("Testing write file in parallel; read serially");
+        HDputs("Testing undefined fill value");
 
-    CHECK_CUR_FILTER_AVAIL();
+    VRFY((H5Pget_alloc_time(dcpl_id, &alloc_time) >= 0), "H5Pget_alloc_time succeeded");
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NROWS;
-    dataset_dims[1] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NCOLS;
-    dataset_dims[2] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_DEPTH;
-    chunk_dims[0]   = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
-    chunk_dims[2]   = 1;
-    sel_dims[0]     = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
-    sel_dims[1]     = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NCOLS;
-    sel_dims[2]     = (hsize_t)WRITE_PARALLEL_READ_SERIAL_DEPTH;
+    dataset_dims[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NROWS;
+    dataset_dims[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NCOLS;
+    chunk_dims[0]   = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1]     = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
 
-    filespace = H5Screate_simple(WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS, dataset_dims, NULL);
+    filespace = H5Screate_simple(FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS, sel_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
-
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, WRITE_PARALLEL_READ_SERIAL_DATASET_DIMS, chunk_dims) >= 0),
-         "Chunk size set");
+    VRFY((H5Pset_chunk(plist_id, FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
 
-    dset_id = H5Dcreate2(file_id, WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
+    /* Set an undefined fill value */
+    VRFY((H5Pset_fill_value(plist_id, HDF5_DATATYPE_NAME, NULL) >= 0), "Fill Value set");
+
+    dset_id = H5Dcreate2(group_id, FILL_VALUE_UNDEFINED_TEST_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
                          H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
-    /* Each process defines the dataset selection in memory and writes
-     * it to the hyperslab in the file
+    /* Allocate buffer for reading entire dataset */
+    read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf);
+
+    read_buf = HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    /*
+     * Read entire dataset - nothing to verify since there's no fill value.
+     * If not using early space allocation, the read should fail since storage
+     * isn't allocated yet and no fill value is defined.
      */
-    count[0]  = 1;
-    count[1]  = (hsize_t)WRITE_PARALLEL_READ_SERIAL_NCOLS / (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
-    count[2]  = (hsize_t)mpi_size;
-    stride[0] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
-    stride[1] = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
-    stride[2] = 1;
-    block[0]  = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS;
-    block[1]  = (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NCOLS;
-    block[2]  = 1;
-    offset[0] = ((hsize_t)mpi_rank * (hsize_t)WRITE_PARALLEL_READ_SERIAL_CH_NROWS * count[0]);
-    offset[1] = 0;
-    offset[2] = 0;
+    if (alloc_time == H5D_ALLOC_TIME_EARLY) {
+        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+             "Dataset read succeeded");
+    }
+    else {
+        H5E_BEGIN_TRY
+        {
+            VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) < 0),
+                 "Dataset read succeeded");
+        }
+        H5E_END_TRY;
+    }
+
+    /*
+     * Write to part of the first chunk in the dataset with
+     * all ranks, then read the whole dataset. Don't verify
+     * anything since there's no fill value defined.
+     */
+    count[0]  = 1;
+    count[1]  = 1;
+    stride[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS;
+    block[0]  = 1;
+    block[1]  = (hsize_t)(FILL_VALUE_UNDEFINED_TEST_CH_NCOLS - 1);
+    start[0]  = (hsize_t)mpi_rank;
+    start[1]  = 0;
 
     if (VERBOSE_MED) {
-        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ], stride[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE " ], offset[ %" PRIuHSIZE
-                 ", %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE ", %" PRIuHSIZE ", %" PRIuHSIZE
-                 " ]\n",
-                 mpi_rank, count[0], count[1], count[2], stride[0], stride[1], stride[2], offset[0],
-                 offset[1], offset[2], block[0], block[1], block[2]);
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
         HDfflush(stdout);
     }
 
@@ -6244,11 +8303,11 @@ test_write_parallel_read_serial(void)
     filespace = H5Dget_space(dset_id);
     VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
-    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, offset, stride, count, block) >= 0),
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
     /* Fill data buffer */
-    data_size = sel_dims[0] * sel_dims[1] * sel_dims[2] * sizeof(*data);
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
     data = (C_DATATYPE *)HDcalloc(1, data_size);
     VRFY((NULL != data), "HDcalloc succeeded");
@@ -6256,150 +8315,204 @@ test_write_parallel_read_serial(void)
     for (i = 0; i < data_size / sizeof(*data); i++)
         data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
-
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
-
-    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, memspace, filespace, plist_id, data) >= 0),
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
          "Dataset write succeeded");
 
-    if (data)
-        HDfree(data);
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
 
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
-    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
-
-    if (MAINPROCESS) {
-        plist_id = H5Pcreate(H5P_FILE_ACCESS);
-        VRFY((plist_id >= 0), "FAPL creation succeeded");
 
-        VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-             "Set libver bounds succeeded");
+    dset_id = H5Dopen2(group_id, FILL_VALUE_UNDEFINED_TEST_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-        file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
-        VRFY((file_id >= 0), "Test file open succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-        VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    /*
+     * Write to whole dataset and ensure data is correct
+     * after reading whole dataset back
+     */
 
-        dset_id = H5Dopen2(file_id, "/" WRITE_PARALLEL_READ_SERIAL_DATASET_NAME, H5P_DEFAULT);
-        VRFY((dset_id >= 0), "Dataset open succeeded");
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0]  = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NROWS / (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS;
+    count[1]  = (hsize_t)FILL_VALUE_UNDEFINED_TEST_NCOLS / (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS;
+    stride[0] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS;
+    block[0]  = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = (hsize_t)FILL_VALUE_UNDEFINED_TEST_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * block[0];
+    start[1]  = 0;
 
-        correct_buf_size = dataset_dims[0] * dataset_dims[1] * dataset_dims[2] * sizeof(*correct_buf);
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
+    }
 
-        correct_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-        VRFY((NULL != correct_buf), "HDcalloc succeeded");
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
 
-        read_buf = (C_DATATYPE *)HDcalloc(1, correct_buf_size);
-        VRFY((NULL != read_buf), "HDcalloc succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-        for (i = 0; i < correct_buf_size / sizeof(*correct_buf); i++)
-            correct_buf[i] = (C_DATATYPE)((i % (dataset_dims[0] * dataset_dims[1])) +
-                                          (i / (dataset_dims[0] * dataset_dims[1])));
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
 
-        VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf) >= 0),
-             "Dataset read succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
 
-        VRFY((0 == HDmemcmp(read_buf, correct_buf, correct_buf_size)), "Data verification succeeded");
+    /* Verify correct data was written */
+    dset_id = H5Dopen2(group_id, FILL_VALUE_UNDEFINED_TEST_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
 
-        VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
-        VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
 
-        HDfree(correct_buf);
+    if (data)
+        HDfree(data);
+    if (read_buf)
         HDfree(read_buf);
-    }
+
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+    VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
+    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
 }
 
 /*
- * Tests that causing chunks to continually grow and shrink
- * by writing random data followed by zeroed-out data (and
- * thus controlling the compression ratio) does not cause
- * problems.
- *
- * Programmer: Jordan Henderson
- *             06/04/2018
+ * Tests that the parallel compression feature correctly handles
+ * avoiding writing fill values to a dataset when the fill time
+ * is set as H5D_FILL_TIME_NEVER.
  */
 static void
-test_shrinking_growing_chunks(void)
+test_fill_time_never(const char *parent_group, H5Z_filter_t filter_id, hid_t fapl_id, hid_t dcpl_id,
+                     hid_t dxpl_id)
 {
-    double *data = NULL;
-    hsize_t dataset_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
-    hsize_t chunk_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
-    hsize_t sel_dims[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
-    hsize_t start[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
-    hsize_t stride[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
-    hsize_t count[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
-    hsize_t block[SHRINKING_GROWING_CHUNKS_DATASET_DIMS];
-    size_t  i, data_size;
-    hid_t   file_id = -1, dset_id = -1, plist_id = -1;
-    hid_t   filespace = -1, memspace = -1;
+    C_DATATYPE *data     = NULL;
+    C_DATATYPE *read_buf = NULL;
+    C_DATATYPE *fill_buf = NULL;
+    C_DATATYPE  fill_value;
+    hsize_t     dataset_dims[FILL_TIME_NEVER_TEST_DATASET_DIMS];
+    hsize_t     chunk_dims[FILL_TIME_NEVER_TEST_DATASET_DIMS];
+    hsize_t     sel_dims[FILL_TIME_NEVER_TEST_DATASET_DIMS];
+    hsize_t     start[FILL_TIME_NEVER_TEST_DATASET_DIMS];
+    hsize_t     stride[FILL_TIME_NEVER_TEST_DATASET_DIMS];
+    hsize_t     count[FILL_TIME_NEVER_TEST_DATASET_DIMS];
+    hsize_t     block[FILL_TIME_NEVER_TEST_DATASET_DIMS];
+    size_t      i, data_size, read_buf_size;
+    hid_t       file_id = H5I_INVALID_HID, dset_id = H5I_INVALID_HID, plist_id = H5I_INVALID_HID;
+    hid_t       group_id   = H5I_INVALID_HID;
+    hid_t       filespace  = H5I_INVALID_HID;
+    int *       recvcounts = NULL;
+    int *       displs     = NULL;
 
     if (MAINPROCESS)
-        HDputs("Testing continually shrinking/growing chunks");
-
-    CHECK_CUR_FILTER_AVAIL();
+        HDputs("Testing fill time H5D_FILL_TIME_NEVER");
 
-    /* Set up file access property list with parallel I/O access */
-    plist_id = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((plist_id >= 0), "FAPL creation succeeded");
+    /*
+     * Only run this test when incremental file space allocation is
+     * used, as HDF5's chunk allocation code always writes fill values
+     * when filters are in the pipeline, but parallel compression does
+     * incremental file space allocation differently.
+     */
+    {
+        H5D_alloc_time_t alloc_time;
 
-    VRFY((H5Pset_fapl_mpio(plist_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+        VRFY((H5Pget_alloc_time(dcpl_id, &alloc_time) >= 0), "H5Pget_alloc_time succeeded");
 
-    VRFY((H5Pset_libver_bounds(plist_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+        if (alloc_time != H5D_ALLOC_TIME_INCR) {
+            if (MAINPROCESS)
+                SKIPPED();
+            return;
+        }
+    }
 
-    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, plist_id);
+    file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
     VRFY((file_id >= 0), "Test file open succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "FAPL close succeeded");
+    group_id = H5Gopen2(file_id, parent_group, H5P_DEFAULT);
+    VRFY((group_id >= 0), "H5Gopen2 succeeded");
 
     /* Create the dataspace for the dataset */
-    dataset_dims[0] = (hsize_t)SHRINKING_GROWING_CHUNKS_NROWS;
-    dataset_dims[1] = (hsize_t)SHRINKING_GROWING_CHUNKS_NCOLS;
-    chunk_dims[0]   = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
-    chunk_dims[1]   = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
-    sel_dims[0]     = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
-    sel_dims[1]     = (hsize_t)SHRINKING_GROWING_CHUNKS_NCOLS;
+    dataset_dims[0] = (hsize_t)FILL_TIME_NEVER_TEST_NROWS;
+    dataset_dims[1] = (hsize_t)FILL_TIME_NEVER_TEST_NCOLS;
+    chunk_dims[0]   = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS;
+    chunk_dims[1]   = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS;
+    sel_dims[0]     = (hsize_t)DIM0_SCALE_FACTOR;
+    sel_dims[1]     = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS * (hsize_t)DIM1_SCALE_FACTOR;
 
-    filespace = H5Screate_simple(SHRINKING_GROWING_CHUNKS_DATASET_DIMS, dataset_dims, NULL);
+    filespace = H5Screate_simple(FILL_TIME_NEVER_TEST_DATASET_DIMS, dataset_dims, NULL);
     VRFY((filespace >= 0), "File dataspace creation succeeded");
 
-    memspace = H5Screate_simple(SHRINKING_GROWING_CHUNKS_DATASET_DIMS, sel_dims, NULL);
-    VRFY((memspace >= 0), "Memory dataspace creation succeeded");
-
     /* Create chunked dataset */
-    plist_id = H5Pcreate(H5P_DATASET_CREATE);
-    VRFY((plist_id >= 0), "DCPL creation succeeded");
+    plist_id = H5Pcopy(dcpl_id);
+    VRFY((plist_id >= 0), "DCPL copy succeeded");
 
-    VRFY((H5Pset_chunk(plist_id, SHRINKING_GROWING_CHUNKS_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set");
+    VRFY((H5Pset_chunk(plist_id, FILL_TIME_NEVER_TEST_DATASET_DIMS, chunk_dims) >= 0), "Chunk size set");
 
     /* Add test filter to the pipeline */
-    VRFY((set_dcpl_filter(plist_id) >= 0), "Filter set");
+    VRFY((set_dcpl_filter(plist_id, filter_id, NULL) >= 0), "Filter set");
+
+    /* Set a fill value */
+    fill_value = FILL_VALUES_TEST_FILL_VAL;
+    VRFY((H5Pset_fill_value(plist_id, HDF5_DATATYPE_NAME, &fill_value) >= 0), "Fill Value set");
 
-    dset_id = H5Dcreate2(file_id, SHRINKING_GROWING_CHUNKS_DATASET_NAME, H5T_NATIVE_DOUBLE, filespace,
+    /* Set fill time of 'never' */
+    VRFY((H5Pset_fill_time(plist_id, H5D_FILL_TIME_NEVER) >= 0), "H5Pset_fill_time succeeded");
+
+    dset_id = H5Dcreate2(group_id, FILL_TIME_NEVER_TEST_DATASET_NAME, HDF5_DATATYPE_NAME, filespace,
                          H5P_DEFAULT, plist_id, H5P_DEFAULT);
     VRFY((dset_id >= 0), "Dataset creation succeeded");
 
-    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, DATASET_JUST_CREATED);
+
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
 
+    /* Allocate buffer for reading entire dataset */
+    read_buf_size = dataset_dims[0] * dataset_dims[1] * sizeof(*read_buf);
+
+    read_buf = HDcalloc(1, read_buf_size);
+    VRFY((NULL != read_buf), "HDcalloc succeeded");
+
+    fill_buf = HDcalloc(1, read_buf_size);
+    VRFY((NULL != fill_buf), "HDcalloc succeeded");
+
+    /* Read entire dataset and verify that the fill value isn't returned */
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    for (i = 0; i < read_buf_size / sizeof(*read_buf); i++)
+        fill_buf[i] = FILL_TIME_NEVER_TEST_FILL_VAL;
+
     /*
-     * Each process defines the dataset selection in memory and writes
-     * it to the hyperslab in the file
+     * It should be very unlikely for the dataset's random
+     * values to all be the fill value, so this should be
+     * a safe comparison in theory.
+     */
+    VRFY((0 != HDmemcmp(read_buf, fill_buf, read_buf_size)), "Data verification succeeded");
+
+    /*
+     * Write to part of the first chunk in the dataset with
+     * all ranks, then read the whole dataset and ensure that
+     * the fill value isn't returned for the unwritten part of
+     * the chunk, as well as for the rest of the dataset that
+     * hasn't been written to yet.
      */
     count[0]  = 1;
-    count[1]  = (hsize_t)SHRINKING_GROWING_CHUNKS_NCOLS / (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
-    stride[0] = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
-    stride[1] = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
-    block[0]  = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS;
-    block[1]  = (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NCOLS;
-    start[0]  = ((hsize_t)mpi_rank * (hsize_t)SHRINKING_GROWING_CHUNKS_CH_NROWS * count[0]);
+    count[1]  = 1;
+    stride[0] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS;
+    block[0]  = 1;
+    block[1]  = (hsize_t)(FILL_TIME_NEVER_TEST_CH_NCOLS - 1);
+    start[0]  = (hsize_t)mpi_rank;
     start[1]  = 0;
 
     if (VERBOSE_MED) {
@@ -6412,44 +8525,122 @@ test_shrinking_growing_chunks(void)
 
     /* Select hyperslab in the file */
     filespace = H5Dget_space(dset_id);
-    VRFY((dset_id >= 0), "File dataspace retrieval succeeded");
+    VRFY((filespace >= 0), "File dataspace retrieval succeeded");
 
     VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
          "Hyperslab selection succeeded");
 
-    /* Create property list for collective dataset write */
-    plist_id = H5Pcreate(H5P_DATASET_XFER);
-    VRFY((plist_id >= 0), "DXPL creation succeeded");
+    /* Fill data buffer */
+    data_size = sel_dims[0] * sel_dims[1] * sizeof(*data);
 
-    VRFY((H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE) >= 0), "Set DXPL MPIO succeeded");
+    data = (C_DATATYPE *)HDcalloc(1, data_size);
+    VRFY((NULL != data), "HDcalloc succeeded");
 
-    data_size = sel_dims[0] * sel_dims[1] * sizeof(double);
+    for (i = 0; i < data_size / sizeof(*data); i++)
+        data[i] = (C_DATATYPE)GEN_DATA(i);
 
-    data = (double *)HDcalloc(1, data_size);
-    VRFY((NULL != data), "HDcalloc succeeded");
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
 
-    for (i = 0; i < SHRINKING_GROWING_CHUNKS_NLOOPS; i++) {
-        /* Continually write random float data, followed by zeroed-out data */
-        if ((i % 2))
-            HDmemset(data, 0, data_size);
-        else {
-            size_t j;
-            for (j = 0; j < data_size / sizeof(*data); j++) {
-                data[j] = (float)(rand() / (double)(RAND_MAX / (double)1.0L));
-            }
-        }
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, SOME_CHUNKS_WRITTEN);
 
-        VRFY((H5Dwrite(dset_id, H5T_NATIVE_DOUBLE, memspace, filespace, plist_id, data) >= 0),
-             "Dataset write succeeded");
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+
+    /* Verify correct data was written */
+    dset_id = H5Dopen2(group_id, FILL_TIME_NEVER_TEST_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    /*
+     * Each MPI rank communicates their written piece of data
+     * into each other rank's correctness-checking buffer
+     */
+    recvcounts = HDcalloc(1, (size_t)mpi_size * sizeof(*recvcounts));
+    VRFY((NULL != recvcounts), "HDcalloc succeeded");
+
+    displs = HDcalloc(1, (size_t)mpi_size * sizeof(*displs));
+    VRFY((NULL != displs), "HDcalloc succeeded");
+
+    for (i = 0; i < (size_t)mpi_size; i++) {
+        recvcounts[i] = (int)(count[1] * block[1]);
+        displs[i]     = (int)(i * dataset_dims[1]);
+    }
+
+    VRFY((MPI_SUCCESS == MPI_Allgatherv(data, recvcounts[mpi_rank], C_DATATYPE_MPI, fill_buf, recvcounts,
+                                        displs, C_DATATYPE_MPI, comm)),
+         "MPI_Allgatherv succeeded");
+
+    /*
+     * It should be very unlikely for the dataset's random
+     * values to all be the fill value, so this should be
+     * a safe comparison in theory.
+     */
+    VRFY((0 != HDmemcmp(read_buf, fill_buf, read_buf_size)), "Data verification succeeded");
+
+    /*
+     * Write to whole dataset and ensure fill value isn't returned
+     * after reading whole dataset back
+     */
+
+    /* Each process defines the dataset selection in memory and writes
+     * it to the hyperslab in the file
+     */
+    count[0]  = (hsize_t)FILL_TIME_NEVER_TEST_NROWS / (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS;
+    count[1]  = (hsize_t)FILL_TIME_NEVER_TEST_NCOLS / (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS;
+    stride[0] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS;
+    stride[1] = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS;
+    block[0]  = (hsize_t)FILL_TIME_NEVER_TEST_CH_NROWS / (hsize_t)mpi_size;
+    block[1]  = (hsize_t)FILL_TIME_NEVER_TEST_CH_NCOLS;
+    start[0]  = (hsize_t)mpi_rank * block[0];
+    start[1]  = 0;
+
+    if (VERBOSE_MED) {
+        HDprintf("Process %d is writing with count[ %" PRIuHSIZE ", %" PRIuHSIZE " ], stride[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ], start[ %" PRIuHSIZE ", %" PRIuHSIZE " ], block size[ %" PRIuHSIZE
+                 ", %" PRIuHSIZE " ]\n",
+                 mpi_rank, count[0], count[1], stride[0], stride[1], start[0], start[1], block[0], block[1]);
+        HDfflush(stdout);
     }
 
+    VRFY((H5Sselect_hyperslab(filespace, H5S_SELECT_SET, start, stride, count, block) >= 0),
+         "Hyperslab selection succeeded");
+
+    VRFY((H5Dwrite(dset_id, HDF5_DATATYPE_NAME, H5S_BLOCK, filespace, dxpl_id, data) >= 0),
+         "Dataset write succeeded");
+
+    /* Verify space allocation status */
+    verify_space_alloc_status(dset_id, plist_id, ALL_CHUNKS_WRITTEN);
+
+    VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
+
+    /* Verify correct data was written */
+    dset_id = H5Dopen2(group_id, FILL_TIME_NEVER_TEST_DATASET_NAME, H5P_DEFAULT);
+    VRFY((dset_id >= 0), "Dataset open succeeded");
+
+    VRFY((H5Dread(dset_id, HDF5_DATATYPE_NAME, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0),
+         "Dataset read succeeded");
+
+    for (i = 0; i < read_buf_size / sizeof(*read_buf); i++)
+        VRFY((read_buf[i] != FILL_TIME_NEVER_TEST_FILL_VAL), "Data verification succeeded");
+
+    if (displs)
+        HDfree(displs);
+    if (recvcounts)
+        HDfree(recvcounts);
     if (data)
         HDfree(data);
+    if (read_buf)
+        HDfree(read_buf);
+    if (fill_buf)
+        HDfree(fill_buf);
 
+    VRFY((H5Pclose(plist_id) >= 0), "DCPL close succeeded");
     VRFY((H5Dclose(dset_id) >= 0), "Dataset close succeeded");
     VRFY((H5Sclose(filespace) >= 0), "File dataspace close succeeded");
-    VRFY((H5Sclose(memspace) >= 0), "Memory dataspace close succeeded");
-    VRFY((H5Pclose(plist_id) >= 0), "DXPL close succeeded");
+    VRFY((H5Gclose(group_id) >= 0), "Group close succeeded");
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
 
     return;
@@ -6459,8 +8650,14 @@ test_shrinking_growing_chunks(void)
 int
 main(int argc, char **argv)
 {
-    size_t i;
-    hid_t  file_id = -1, fapl = -1;
+    size_t cur_filter_idx = 0;
+    size_t num_filters    = 0;
+    hid_t  file_id        = H5I_INVALID_HID;
+    hid_t  fcpl_id        = H5I_INVALID_HID;
+    hid_t  group_id       = H5I_INVALID_HID;
+    hid_t  fapl_id        = H5I_INVALID_HID;
+    hid_t  dxpl_id        = H5I_INVALID_HID;
+    hid_t  dcpl_id        = H5I_INVALID_HID;
     int    mpi_code;
 
     /* Initialize MPI */
@@ -6487,7 +8684,7 @@ main(int argc, char **argv)
 
     if (MAINPROCESS) {
         HDprintf("==========================\n");
-        HDprintf("Parallel Filters tests\n");
+        HDprintf("  Parallel Filters tests\n");
         HDprintf("==========================\n\n");
     }
 
@@ -6496,72 +8693,161 @@ main(int argc, char **argv)
 
     TestAlarmOn();
 
-    /* Create test file */
-    fapl = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((fapl >= 0), "FAPL creation succeeded");
-
-    VRFY((H5Pset_fapl_mpio(fapl, comm, info) >= 0), "Set FAPL MPIO succeeded");
-
-    VRFY((H5Pset_libver_bounds(fapl, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
-
-    VRFY((h5_fixname(FILENAME[0], fapl, filenames[0], sizeof(filenames[0])) != NULL),
-         "Test file name created");
+    num_filters = ARRAY_SIZE(filterIDs);
 
-    file_id = H5Fcreate(filenames[0], H5F_ACC_TRUNC, H5P_DEFAULT, fapl);
-    VRFY((file_id >= 0), "Test file creation succeeded");
+    /* Set up file access property list with parallel I/O access,
+     * collective metadata reads/writes and the latest library
+     * version bounds */
+    fapl_id = H5Pcreate(H5P_FILE_ACCESS);
+    VRFY((fapl_id >= 0), "FAPL creation succeeded");
 
-    VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    VRFY((H5Pset_fapl_mpio(fapl_id, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    VRFY((H5Pset_all_coll_metadata_ops(fapl_id, TRUE) >= 0), "H5Pset_all_coll_metadata_ops succeeded");
+    VRFY((H5Pset_coll_metadata_write(fapl_id, TRUE) >= 0), "H5Pset_coll_metadata_write succeeded");
 
-    for (i = 0; i < ARRAY_SIZE(tests); i++) {
-        if (MPI_SUCCESS == (mpi_code = MPI_Barrier(comm))) {
-            (*tests[i])();
-        }
-        else {
-            if (MAINPROCESS)
-                MESG("MPI_Barrier failed");
-            nerrors++;
-        }
-    }
+    VRFY((H5Pset_libver_bounds(fapl_id, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
+         "Set libver bounds succeeded");
 
     /*
-     * Increment the filter index to switch to the checksum filter
-     * and re-run the tests.
+     * Set up Paged and Persistent Free Space Management
      */
-    cur_filter_idx++;
-
-    h5_clean_files(FILENAME, fapl);
-
-    fapl = H5Pcreate(H5P_FILE_ACCESS);
-    VRFY((fapl >= 0), "FAPL creation succeeded");
+    fcpl_id = H5Pcreate(H5P_FILE_CREATE);
+    VRFY((fcpl_id >= 0), "FCPL creation succeeded");
 
-    VRFY((H5Pset_fapl_mpio(fapl, comm, info) >= 0), "Set FAPL MPIO succeeded");
+    VRFY((H5Pset_file_space_strategy(fcpl_id, H5F_FSPACE_STRATEGY_PAGE, TRUE, 1) >= 0),
+         "H5Pset_file_space_strategy succeeded");
 
-    VRFY((H5Pset_libver_bounds(fapl, H5F_LIBVER_LATEST, H5F_LIBVER_LATEST) >= 0),
-         "Set libver bounds succeeded");
+    VRFY((h5_fixname(FILENAME[0], fapl_id, filenames[0], sizeof(filenames[0])) != NULL),
+         "Test file name created");
 
-    file_id = H5Fcreate(filenames[0], H5F_ACC_TRUNC, H5P_DEFAULT, fapl);
+    file_id = H5Fcreate(filenames[0], H5F_ACC_TRUNC, fcpl_id, fapl_id);
     VRFY((file_id >= 0), "Test file creation succeeded");
 
     VRFY((H5Fclose(file_id) >= 0), "File close succeeded");
+    file_id = H5I_INVALID_HID;
 
-    if (MAINPROCESS) {
-        HDprintf("\n=================================================================\n");
-        HDprintf("Re-running Parallel Filters tests with Fletcher32 checksum filter\n");
-        HDprintf("=================================================================\n\n");
-    }
-
-    for (i = 0; i < ARRAY_SIZE(tests); i++) {
-        if (MPI_SUCCESS == (mpi_code = MPI_Barrier(comm))) {
-            (*tests[i])();
-        }
-        else {
-            if (MAINPROCESS)
-                MESG("MPI_Barrier failed");
-            nerrors++;
+    /* Create property list for collective dataset write */
+    dxpl_id = H5Pcreate(H5P_DATASET_XFER);
+    VRFY((dxpl_id >= 0), "DXPL creation succeeded");
+
+    VRFY((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE) >= 0), "H5Pset_dxpl_mpio succeeded");
+
+    /* Create DCPL for dataset creation */
+    dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
+    VRFY((dcpl_id >= 0), "DCPL creation succeeded");
+
+    /* Run tests with all available filters */
+    for (cur_filter_idx = 0; cur_filter_idx < num_filters; cur_filter_idx++) {
+        H5FD_mpio_chunk_opt_t chunk_opt;
+        H5Z_filter_t          cur_filter = filterIDs[cur_filter_idx];
+
+        /* Run tests with both linked-chunk and multi-chunk I/O */
+        for (chunk_opt = H5FD_MPIO_CHUNK_ONE_IO; chunk_opt <= H5FD_MPIO_CHUNK_MULTI_IO; chunk_opt++) {
+            H5D_alloc_time_t space_alloc_time;
+
+            /* Run tests with all available space allocation times */
+            for (space_alloc_time = H5D_ALLOC_TIME_EARLY; space_alloc_time <= H5D_ALLOC_TIME_INCR;
+                 space_alloc_time++) {
+                const char *alloc_time;
+                unsigned    filter_config;
+                htri_t      filter_avail;
+                size_t      i;
+                char        group_name[512];
+
+                switch (space_alloc_time) {
+                    case H5D_ALLOC_TIME_EARLY:
+                        alloc_time = "Early";
+                        break;
+                    case H5D_ALLOC_TIME_LATE:
+                        alloc_time = "Late";
+                        break;
+                    case H5D_ALLOC_TIME_INCR:
+                        alloc_time = "Incremental";
+                        break;
+                    default:
+                        alloc_time = "Unknown";
+                }
+
+                if (MAINPROCESS)
+                    HDprintf("== Running tests with filter '%s' using '%s' and '%s' allocation time ==\n\n",
+                             filterNames[cur_filter_idx],
+                             H5FD_MPIO_CHUNK_ONE_IO == chunk_opt ? "Linked-Chunk I/O" : "Multi-Chunk I/O",
+                             alloc_time);
+
+                /* Make sure current filter is available before testing with it */
+                filter_avail = H5Zfilter_avail(cur_filter);
+                VRFY((filter_avail >= 0), "H5Zfilter_avail succeeded");
+
+                if (!filter_avail) {
+                    if (MAINPROCESS)
+                        HDprintf(" ** SKIPPED tests with filter '%s' - filter unavailable **\n\n",
+                                 filterNames[cur_filter_idx]);
+                    continue;
+                }
+
+                /* Get the current filter's info */
+                VRFY((H5Zget_filter_info(cur_filter, &filter_config) >= 0), "H5Zget_filter_info succeeded");
+
+                /* Determine if filter is encode-enabled */
+                if (0 == (filter_config & H5Z_FILTER_CONFIG_ENCODE_ENABLED)) {
+                    if (MAINPROCESS)
+                        HDprintf(" ** SKIPPED tests with filter '%s' - filter not encode-enabled **\n\n",
+                                 filterNames[cur_filter_idx]);
+                    continue;
+                }
+
+                /* Set space allocation time */
+                VRFY((H5Pset_alloc_time(dcpl_id, space_alloc_time) >= 0), "H5Pset_alloc_time succeeded");
+
+                /* Set chunk I/O optimization method */
+                VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, chunk_opt) >= 0),
+                     "H5Pset_dxpl_mpio_chunk_opt succeeded");
+
+                /* Create a group to hold all the datasets for this combination
+                 * of filter and chunk optimization mode. Then, close the file
+                 * again since some tests may need to open the file in a special
+                 * way, like on rank 0 only */
+                file_id = H5Fopen(filenames[0], H5F_ACC_RDWR, fapl_id);
+                VRFY((file_id >= 0), "H5Fopen succeeded");
+
+                HDsnprintf(group_name, sizeof(group_name), "%s_%s_%s", filterNames[cur_filter_idx],
+                           H5FD_MPIO_CHUNK_ONE_IO == chunk_opt ? "linked-chunk-io" : "multi-chunk-io",
+                           alloc_time);
+
+                group_id = H5Gcreate2(file_id, group_name, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
+                VRFY((group_id >= 0), "H5Gcreate2 succeeded");
+
+                VRFY((H5Gclose(group_id) >= 0), "H5Gclose failed");
+                group_id = H5I_INVALID_HID;
+
+                VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
+                file_id = H5I_INVALID_HID;
+
+                for (i = 0; i < ARRAY_SIZE(tests); i++) {
+                    test_func func = tests[i];
+
+                    if (MPI_SUCCESS == (mpi_code = MPI_Barrier(comm))) {
+                        func(group_name, cur_filter, fapl_id, dcpl_id, dxpl_id);
+                    }
+                    else {
+                        if (MAINPROCESS)
+                            MESG("MPI_Barrier failed");
+                        nerrors++;
+                    }
+                }
+
+                if (MAINPROCESS)
+                    HDputs("");
+            }
         }
     }
 
+    VRFY((H5Pclose(dcpl_id) >= 0), "DCPL close succeeded");
+    dcpl_id = H5I_INVALID_HID;
+
+    VRFY((H5Pclose(dxpl_id) >= 0), "DXPL close succeeded");
+    dxpl_id = H5I_INVALID_HID;
+
     if (nerrors)
         goto exit;
 
@@ -6575,7 +8861,21 @@ exit:
 
     TestAlarmOff();
 
-    h5_clean_files(FILENAME, fapl);
+    h5_clean_files(FILENAME, fapl_id);
+    fapl_id = H5I_INVALID_HID;
+
+    if (dcpl_id >= 0)
+        VRFY((H5Pclose(dcpl_id) >= 0), "H5Pclose succeeded");
+    if (dxpl_id >= 0)
+        VRFY((H5Pclose(dxpl_id) >= 0), "H5Pclose succeeded");
+    if (fapl_id >= 0)
+        VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
+    if (fcpl_id >= 0)
+        VRFY((H5Pclose(fcpl_id) >= 0), "H5Pclose succeeded");
+    if (group_id >= 0)
+        VRFY((H5Gclose(group_id) >= 0), "H5Gclose succeeded");
+    if (file_id >= 0)
+        VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
 
     H5close();
 
diff --git a/testpar/t_filters_parallel.h b/testpar/t_filters_parallel.h
index 7eb34ed..800604c 100644
--- a/testpar/t_filters_parallel.h
+++ b/testpar/t_filters_parallel.h
@@ -30,23 +30,23 @@
 #include "stdlib.h"
 #include "testpar.h"
 
+#define ARRAY_SIZE(a) sizeof(a) / sizeof(a[0])
+
 /* Used to load other filters than GZIP */
 /* #define DYNAMIC_FILTER */ /* Uncomment and define the fields below to use a dynamically loaded filter */
+
+#ifdef DYNAMIC_FILTER
 #define FILTER_NUM_CDVALUES 1
 const unsigned int cd_values[FILTER_NUM_CDVALUES] = {0};
-H5Z_filter_t       filter_id;
-unsigned int       flags     = 0;
-size_t             cd_nelmts = FILTER_NUM_CDVALUES;
-
-/* Utility Macros */
-#define STRINGIFY(type) #type
+unsigned int       flags                          = 0;
+size_t             cd_nelmts                      = FILTER_NUM_CDVALUES;
+#endif
 
 /* Common defines for all tests */
-#define C_DATATYPE           long
-#define C_DATATYPE_MPI       MPI_LONG
-#define COMPOUND_C_DATATYPE  cmpd_filtered_t
-#define C_DATATYPE_STR(type) STRINGIFY(type)
-#define HDF5_DATATYPE_NAME   H5T_NATIVE_LONG
+#define C_DATATYPE          long
+#define C_DATATYPE_MPI      MPI_LONG
+#define COMPOUND_C_DATATYPE cmpd_filtered_t
+#define HDF5_DATATYPE_NAME  H5T_NATIVE_LONG
 
 /* Macro used to generate data for datasets for later verification */
 #define GEN_DATA(i) INCREMENTAL_DATA(i)
@@ -59,7 +59,7 @@ size_t             cd_nelmts = FILTER_NUM_CDVALUES;
 #define RANK_DATA(i)                                                                                         \
     (mpi_rank) /* Generates test data to visibly show which rank wrote to which parts of the dataset */
 
-#define DEFAULT_DEFLATE_LEVEL 6
+#define DEFAULT_DEFLATE_LEVEL 9
 
 #define DIM0_SCALE_FACTOR 4
 #define DIM1_SCALE_FACTOR 2
@@ -89,6 +89,14 @@ typedef struct {
 #define WRITE_UNSHARED_FILTERED_CHUNKS_CH_NROWS     (WRITE_UNSHARED_FILTERED_CHUNKS_NROWS / mpi_size)
 #define WRITE_UNSHARED_FILTERED_CHUNKS_CH_NCOLS     (WRITE_UNSHARED_FILTERED_CHUNKS_NCOLS / mpi_size)
 
+/* Defines for the unshared filtered chunks partial write test */
+#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_NAME "unshared_filtered_chunks_partial_write"
+#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_DATASET_DIMS 2
+#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NROWS        (mpi_size * DIM0_SCALE_FACTOR)
+#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_NCOLS        (mpi_size * DIM1_SCALE_FACTOR)
+#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NROWS     (DIM0_SCALE_FACTOR)
+#define WRITE_UNSHARED_FILTERED_CHUNKS_PARTIAL_CH_NCOLS     (DIM1_SCALE_FACTOR)
+
 /* Defines for the shared filtered chunks write test */
 #define WRITE_SHARED_FILTERED_CHUNKS_DATASET_NAME "shared_filtered_chunks_write"
 #define WRITE_SHARED_FILTERED_CHUNKS_DATASET_DIMS 2
@@ -97,6 +105,42 @@ typedef struct {
 #define WRITE_SHARED_FILTERED_CHUNKS_NROWS        (WRITE_SHARED_FILTERED_CHUNKS_CH_NROWS * DIM0_SCALE_FACTOR)
 #define WRITE_SHARED_FILTERED_CHUNKS_NCOLS        (WRITE_SHARED_FILTERED_CHUNKS_CH_NCOLS * DIM1_SCALE_FACTOR)
 
+/* Defines for the unshared filtered chunks w/ single unlim. dimension write test */
+#define WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_NAME "unshared_filtered_chunks_single_unlim_dim_write"
+#define WRITE_UNSHARED_ONE_UNLIM_DIM_DATASET_DIMS 2
+#define WRITE_UNSHARED_ONE_UNLIM_DIM_NROWS        (mpi_size * DIM0_SCALE_FACTOR)
+#define WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS        (mpi_size * DIM1_SCALE_FACTOR)
+#define WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NROWS     (WRITE_UNSHARED_ONE_UNLIM_DIM_NROWS / mpi_size)
+#define WRITE_UNSHARED_ONE_UNLIM_DIM_CH_NCOLS     (WRITE_UNSHARED_ONE_UNLIM_DIM_NCOLS / mpi_size)
+#define WRITE_UNSHARED_ONE_UNLIM_DIM_NLOOPS       5
+
+/* Defines for the shared filtered chunks w/ single unlim. dimension write test */
+#define WRITE_SHARED_ONE_UNLIM_DIM_DATASET_NAME "shared_filtered_chunks_single_unlim_dim_write"
+#define WRITE_SHARED_ONE_UNLIM_DIM_DATASET_DIMS 2
+#define WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS     (mpi_size)
+#define WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS     (mpi_size)
+#define WRITE_SHARED_ONE_UNLIM_DIM_NROWS        (WRITE_SHARED_ONE_UNLIM_DIM_CH_NROWS * DIM0_SCALE_FACTOR)
+#define WRITE_SHARED_ONE_UNLIM_DIM_NCOLS        (WRITE_SHARED_ONE_UNLIM_DIM_CH_NCOLS * DIM1_SCALE_FACTOR)
+#define WRITE_SHARED_ONE_UNLIM_DIM_NLOOPS       5
+
+/* Defines for the unshared filtered chunks w/ two unlim. dimension write test */
+#define WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_NAME "unshared_filtered_chunks_two_unlim_dim_write"
+#define WRITE_UNSHARED_TWO_UNLIM_DIM_DATASET_DIMS 2
+#define WRITE_UNSHARED_TWO_UNLIM_DIM_NROWS        (mpi_size * DIM0_SCALE_FACTOR)
+#define WRITE_UNSHARED_TWO_UNLIM_DIM_NCOLS        (DIM1_SCALE_FACTOR)
+#define WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NROWS     (DIM0_SCALE_FACTOR)
+#define WRITE_UNSHARED_TWO_UNLIM_DIM_CH_NCOLS     (DIM1_SCALE_FACTOR)
+#define WRITE_UNSHARED_TWO_UNLIM_DIM_NLOOPS       5
+
+/* Defines for the shared filtered chunks w/ two unlim. dimension write test */
+#define WRITE_SHARED_TWO_UNLIM_DIM_DATASET_NAME "shared_filtered_chunks_two_unlim_dim_write"
+#define WRITE_SHARED_TWO_UNLIM_DIM_DATASET_DIMS 2
+#define WRITE_SHARED_TWO_UNLIM_DIM_CH_NROWS     (mpi_size)
+#define WRITE_SHARED_TWO_UNLIM_DIM_CH_NCOLS     (mpi_size)
+#define WRITE_SHARED_TWO_UNLIM_DIM_NROWS        (mpi_size)
+#define WRITE_SHARED_TWO_UNLIM_DIM_NCOLS        (mpi_size)
+#define WRITE_SHARED_TWO_UNLIM_DIM_NLOOPS       5
+
 /* Defines for the filtered chunks write test where a process has no selection */
 #define WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_NAME "single_no_selection_filtered_chunks_write"
 #define WRITE_SINGLE_NO_SELECTION_FILTERED_CHUNKS_DATASET_DIMS 2
@@ -403,4 +447,53 @@ typedef struct {
 #define SHRINKING_GROWING_CHUNKS_CH_NCOLS     (SHRINKING_GROWING_CHUNKS_NCOLS / mpi_size)
 #define SHRINKING_GROWING_CHUNKS_NLOOPS       20
 
+/* Defines for the unshared filtered edge chunks write test */
+#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME  "unshared_filtered_edge_chunks_write"
+#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2 "unshared_filtered_edge_chunks_no_filter_write"
+#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS  2
+#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NROWS      (DIM0_SCALE_FACTOR)
+#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS      (DIM1_SCALE_FACTOR)
+#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NROWS         (mpi_size * DIM0_SCALE_FACTOR)
+#define WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_NCOLS                                                            \
+    (mpi_size * DIM1_SCALE_FACTOR) + (WRITE_UNSHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS - 1)
+
+/* Defines for the shared filtered edge chunks write test */
+#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME  "shared_filtered_edge_chunks_write"
+#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_NAME2 "shared_filtered_edge_chunks_no_filter_write"
+#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_DATASET_DIMS  2
+#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS      (mpi_size)
+#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS      (mpi_size)
+#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_NROWS                                                              \
+    (WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NROWS * DIM0_SCALE_FACTOR)
+#define WRITE_SHARED_FILTERED_EDGE_CHUNKS_NCOLS                                                              \
+    ((WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS * DIM1_SCALE_FACTOR) +                                      \
+     (WRITE_SHARED_FILTERED_EDGE_CHUNKS_CH_NCOLS - 1))
+
+/* Defines for the fill values test */
+#define FILL_VALUES_TEST_DATASET_NAME  "fill_value_test"
+#define FILL_VALUES_TEST_DATASET_NAME2 "fill_value_alloc_test"
+#define FILL_VALUES_TEST_DATASET_DIMS  2
+#define FILL_VALUES_TEST_FILL_VAL      (-1)
+#define FILL_VALUES_TEST_CH_NROWS      (mpi_size)
+#define FILL_VALUES_TEST_CH_NCOLS      (mpi_size + 1)
+#define FILL_VALUES_TEST_NROWS         (FILL_VALUES_TEST_CH_NROWS * DIM0_SCALE_FACTOR)
+#define FILL_VALUES_TEST_NCOLS         (FILL_VALUES_TEST_CH_NCOLS * DIM1_SCALE_FACTOR)
+
+/* Defines for the undefined fill value test */
+#define FILL_VALUE_UNDEFINED_TEST_DATASET_NAME "fill_value_undefined_test"
+#define FILL_VALUE_UNDEFINED_TEST_DATASET_DIMS 2
+#define FILL_VALUE_UNDEFINED_TEST_CH_NROWS     (mpi_size)
+#define FILL_VALUE_UNDEFINED_TEST_CH_NCOLS     (mpi_size + 1)
+#define FILL_VALUE_UNDEFINED_TEST_NROWS        (FILL_VALUE_UNDEFINED_TEST_CH_NROWS * DIM0_SCALE_FACTOR)
+#define FILL_VALUE_UNDEFINED_TEST_NCOLS        (FILL_VALUE_UNDEFINED_TEST_CH_NCOLS * DIM1_SCALE_FACTOR)
+
+/* Defines for the fill time of 'never' test */
+#define FILL_TIME_NEVER_TEST_DATASET_NAME "fill_time_never_test"
+#define FILL_TIME_NEVER_TEST_DATASET_DIMS 2
+#define FILL_TIME_NEVER_TEST_FILL_VAL     (-1)
+#define FILL_TIME_NEVER_TEST_CH_NROWS     (mpi_size)
+#define FILL_TIME_NEVER_TEST_CH_NCOLS     (mpi_size + 1)
+#define FILL_TIME_NEVER_TEST_NROWS        (FILL_TIME_NEVER_TEST_CH_NROWS * DIM0_SCALE_FACTOR)
+#define FILL_TIME_NEVER_TEST_NCOLS        (FILL_TIME_NEVER_TEST_CH_NCOLS * DIM1_SCALE_FACTOR)
+
 #endif /* TEST_PARALLEL_FILTERS_H_ */
diff --git a/testpar/testphdf5.h b/testpar/testphdf5.h
index c692287..16f45d3 100644
--- a/testpar/testphdf5.h
+++ b/testpar/testphdf5.h
@@ -186,10 +186,6 @@ enum H5TEST_COLL_CHUNK_API {
 #define TEST_NOT_SIMPLE_OR_SCALAR_DATASPACES            0x010
 #define TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_COMPACT  0x020
 #define TEST_NOT_CONTIGUOUS_OR_CHUNKED_DATASET_EXTERNAL 0x040
-#define TEST_FILTERS                                    0x080
-/* TEST_FILTERS will take place of this after supporting mpio + filter for
- * H5Dcreate and H5Dwrite */
-#define TEST_FILTERS_READ 0x100
 
 /* Don't erase these lines, they are put here for debugging purposes */
 /*
-- 
cgit v0.12