6 files changed, 536 insertions, 453 deletions
diff --git a/src/H5Distore.c b/src/H5Distore.c
index dcd1d35..278bca9 100644
--- a/src/H5Distore.c
+++ b/src/H5Distore.c
@@ -13,6 +13,21 @@
  *		chunk index to disk address.  Each chunk can be compressed
  *		independently and the chunks may move around in the file as
  *		their storage requirements change.
+ *
+ * Cache:	Disk I/O is performed in units of chunks and H5MF_alloc()
+ *		contains code to optionally align chunks on disk block
+ *		boundaries for performance.
+ *
+ *		The chunk cache is an extendible hash indexed by a function
+ *		of storage B-tree address and chunk N-dimensional offset
+ *		within the dataset.  Collisions are not resolved -- one of
+ *		the two chunks competing for the hash slot must be preempted
+ *		from the cache.  All entries in the hash also participate in
+ *		a doubly-linked list and entries are penalized by moving them
+ *		toward the front of the list.  When a new chunk is about to
+ *		be added to the cache the heap is pruned by preempting
+ *		entries near the front of the list to make room for the new
+ *		entry which is added to the end of the list.
  */
 #include <H5private.h>
 #include <H5Dprivate.h>
@@ -23,6 +38,31 @@
 #include <H5Oprivate.h>
 #include <H5Vprivate.h>
 
+/*
+ * Feature: If this constant is defined then every cache preemption and load
+ *	    causes a character to be printed on the standard error stream:
+ *
+ *     `.': Entry was preempted because it has been completely read or
+ *	    completely written but not partially read and not partially
+ *	    written. This is often a good reason for preemption because such
+ *	    a chunk will be unlikely to be referenced in the near future.
+ *
+ *     `:': Entry was preempted because it hasn't been used recently.
+ *
+ *     `#': Entry was preempted because another chunk collided with it. This
+ *	    is usually a relatively bad thing.  If there are too many of
+ *	    these then the number of entries in the cache can be increased.
+ *
+ *       c: Entry was preempted because the file is closing.
+ *
+ *	 w: A chunk read operation was eliminated because the library is
+ *	    about to write new values to the entire chunk.  This is a good
+ *	    thing, especially on files where the chunk size is the same as
+ *	    the disk block size, chunks are aligned on disk block boundaries,
+ *	    and the operating system can also eliminate a read operation.
+ */
+/* #define H5F_ISTORE_DEBUG */
+
 /* Interface initialization */
 #define PABLO_MASK	H5F_istore_mask
 static hbool_t		interface_initialize_g = FALSE;
@@ -40,6 +80,9 @@ typedef struct H5F_rdcc_ent_t {
     size_t	chunk_size;	/*size of a chunk			*/
     size_t	alloc_size;	/*amount allocated for the chunk	*/
     uint8	*chunk;		/*the unfiltered chunk data		*/
+    intn	idx;		/*index in hash table			*/
+    struct H5F_rdcc_ent_t *next;/*next item in doubly-linked list	*/
+    struct H5F_rdcc_ent_t *prev;/*previous item in doubly-linked list	*/
 } H5F_rdcc_ent_t;
 
 /* Private prototypes */
@@ -688,9 +731,10 @@ H5F_istore_init (H5F_t *f)
     FUNC_ENTER (H5F_istore_init, FAIL);
 
     HDmemset (rdcc, 0, sizeof(H5F_rdcc_t));
-    if (f->shared->access_parms->rdcc_nbytes>0) {
-	rdcc->nslots = 25; /*some initial number of slots*/
-	rdcc->slot = H5MM_calloc (rdcc->nslots*sizeof(H5F_rdcc_ent_t));
+    if (f->shared->access_parms->rdcc_nbytes>0 &&
+	f->shared->access_parms->rdcc_nelmts>0) {
+	rdcc->nslots = f->shared->access_parms->rdcc_nelmts;
+	rdcc->slot = H5MM_calloc (rdcc->nslots*sizeof(H5F_rdcc_ent_t*));
 	if (NULL==rdcc->slot) {
 	    HRETURN_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL,
 			   "memory allocation failed");
@@ -797,7 +841,7 @@ H5F_istore_flush_entry (H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset)
 	f->shared->rdcc.nflushes++;
     }
     
-    /* Reset */
+    /* Reset, but do not free or removed from list */
     if (reset) {
 	point_of_no_return = FALSE;
 	ent->layout = H5O_free(H5O_LAYOUT, ent->layout);
@@ -815,7 +859,8 @@ H5F_istore_flush_entry (H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset)
     /*
      * If we reached the point of no return then we have no choice but to
      * reset the entry.  This can only happen if RESET is true but the
-     * output pipeline failed.
+     * output pipeline failed.  Do not free the entry or remove it from the
+     * list.
      */
     if (ret_value<0 && point_of_no_return) {
 	ent->layout = H5O_free(H5O_LAYOUT, ent->layout);
@@ -846,25 +891,17 @@ H5F_istore_flush_entry (H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset)
 herr_t
 H5F_istore_flush (H5F_t *f)
 {
-    H5F_rdcc_t	*rdcc = &(f->shared->rdcc);
-    intn	i, nerrors=0;
+    H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
+    intn		nerrors=0;
+    H5F_rdcc_ent_t	*ent=NULL;
     
     FUNC_ENTER (H5F_istore_flush, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    for (i=0; i<rdcc->nslots; i++) {
-	if (rdcc->slot[i].chunk &&
-	    H5F_istore_flush_entry(f, rdcc->slot+i, FALSE)<0) {
-	    nerrors++;
-	}
-    }
-#else
-    for (i=0; i<rdcc->nused; i++) {
-	if (H5F_istore_flush_entry (f, rdcc->slot+i, FALSE)<0) {
+    for (ent=rdcc->head; ent; ent=ent->next) {
+	if (H5F_istore_flush_entry(f, ent, FALSE)<0) {
 	    nerrors++;
 	}
     }
-#endif
     if (nerrors) {
 	HRETURN_ERROR (H5E_IO, H5E_CANTFLUSH, FAIL,
 		       "unable to flush one or more raw data chunks");
@@ -890,29 +927,41 @@ H5F_istore_flush (H5F_t *f)
  *-------------------------------------------------------------------------
  */
 static herr_t
-H5F_istore_preempt (H5F_t *f, intn idx)
+H5F_istore_preempt (H5F_t *f, H5F_rdcc_ent_t *ent)
 {
     H5F_rdcc_t          *rdcc = &(f->shared->rdcc);
-    H5F_rdcc_ent_t      *ent = rdcc->slot + idx;
     
     FUNC_ENTER (H5F_istore_preempt, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    assert(idx>=0 && idx<rdcc->nslots);
-    assert (!ent->locked);
+    assert(f);
+    assert(ent);
+    assert(!ent->locked);
+    assert(ent->idx>=0 && ent->idx<rdcc->nslots);
 
+    /* Flush */
     H5F_istore_flush_entry(f, ent, TRUE);
-    rdcc->nbytes -= ent->chunk_size;
-#else
-    assert (idx>=0 && idx<rdcc->nused);
-    assert (!ent->locked);
 
-    H5F_istore_flush_entry (f, ent, TRUE);
-    HDmemmove (rdcc->slot+idx, rdcc->slot+idx+1,
-               (rdcc->nused-(idx+1)) * sizeof(H5F_rdcc_ent_t));
-    rdcc->nused -= 1;
+    /* Unlink from list */
+    if (ent->prev) {
+	ent->prev->next = ent->next;
+    } else {
+	rdcc->head = ent->next;
+    }
+    if (ent->next) {
+	ent->next->prev = ent->prev;
+    } else {
+	rdcc->tail = ent->prev;
+    }
+    ent->prev = ent->next = NULL;
+
+    /* Remove from cache */
+    rdcc->slot[ent->idx] = NULL;
+    ent->idx = -1;
     rdcc->nbytes -= ent->chunk_size;
-#endif
+    --rdcc->nused;
+
+    /* Free */
+    H5MM_xfree(ent);
 
     FUNC_LEAVE (SUCCEED);
 }
@@ -938,25 +987,22 @@ H5F_istore_preempt (H5F_t *f, intn idx)
 herr_t
 H5F_istore_dest (H5F_t *f)
 {
-    H5F_rdcc_t	*rdcc = &(f->shared->rdcc);
-    intn	i, nerrors=0;
+    H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
+    intn		nerrors=0;
+    H5F_rdcc_ent_t	*ent=NULL, *next=NULL;
     
     FUNC_ENTER (H5F_istore_dest, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    for (i=0; i<rdcc->nslots; i++) {
-	if (rdcc->slot[i].chunk &&
-	    H5F_istore_preempt(f, i)<0) {
-	    nerrors++;
-	}
-    }
-#else
-    for (i=rdcc->nused-1; i>=0; --i) {
-	if (H5F_istore_preempt(f, i)<0) {
+    for (ent=rdcc->head; ent; ent=next) {
+#ifdef H5F_ISTORE_DEBUG
+	fputc('c', stderr);
+	fflush(stderr);
+#endif
+	next = ent->next;
+	if (H5F_istore_preempt(f, ent)<0) {
 	    nerrors++;
 	}
     }
-#endif
     if (nerrors) {
 	HRETURN_ERROR (H5E_IO, H5E_CANTFLUSH, FAIL,
 		       "unable to flush one or more raw data chunks");
@@ -989,71 +1035,89 @@ H5F_istore_dest (H5F_t *f)
 static herr_t
 H5F_istore_prune (H5F_t *f, size_t size)
 {
-#ifdef H5F_RDCC_NEW
-    intn		i, nerrors=0;
-    static intn		place=0;
-    H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
-    H5F_rdcc_ent_t 	*ent = NULL;
-    size_t		total = f->shared->access_parms->rdcc_nbytes;
-#else
-    intn		i, meth0, meth1, nerrors=0;
+    intn		i, j, nerrors=0;
     H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
-    H5F_rdcc_ent_t	*ent0, *ent1;
-    double		w0 = f->shared->access_parms->rdcc_w0;
     size_t		total = f->shared->access_parms->rdcc_nbytes;
-#endif
-    
+    const int		nmeth=2;	/*number of methods		*/
+    intn		w[1];		/*weighting as an interval	*/
+    H5F_rdcc_ent_t	*p[2], *cur;	/*list pointers			*/
+    H5F_rdcc_ent_t	*n[2];		/*list next pointers		*/
+
     FUNC_ENTER (H5F_istore_prune, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    for (i=0; rdcc->nbytes+size>total && i<rdcc->nslots; i++, place++) {
-	if (place>=rdcc->nslots) place = 0;
-	ent = rdcc->slot+place;
-	if (ent->chunk && !ent->locked) {
-	    if (H5F_istore_preempt(f, place)<0) nerrors++;
-	}
-    }
-#else
     /*
-     * We have two pointers that slide down the cache beginning at the least
-     * recently used entry.  The distance between the pointers represents the
-     * relative weight.  A weight of 50% for the first pointer means that the
-     * second pointer is half the cache length behind the first pointer.
+     * Preemption is accomplished by having multiple pointers (currently two)
+     * slide down the list beginning at the head. Pointer p(N+1) will start
+     * traversing the list when pointer pN reaches wN percent of the original
+     * list.  In other words, preemption method N gets to consider entries in
+     * approximate least recently used order w0 percent before method N+1
+     * where 100% means tha method N will run to completion before method N+1
+     * begins.  The pointers participating in the list traversal are each
+     * given a chance at preemption before any of the pointers are advanced.
      */
-    meth0 = rdcc->nused;
-    meth1 = rdcc->nused * (1.0+w0);
-    for (i=MAX(meth0, meth1)-1;
-	 rdcc->nbytes+size>total && i>=0;
-	 --i, --meth0, --meth1) {
-
-	ent0 = rdcc->slot+meth0; /*might be a bad pointer!*/
-	ent1 = rdcc->slot+meth1; /*might be a bad pointer!*/
+    w[0] = rdcc->nused * f->shared->access_parms->rdcc_w0;
+    p[0] = rdcc->head;
+    p[1] = NULL;
+
+    while ((p[0] || p[1]) && rdcc->nbytes+size>total) {
+
+	/* Introduce new pointers */
+	for (i=0; i<nmeth-1; i++) if (0==w[i]) p[i+1] = rdcc->head;
 	
-	if (meth0>=0 && meth0<rdcc->nused && !ent0->locked &&
-	    (0==ent0->rd_count || ent0->chunk_size==ent0->rd_count) &&
-	    (0==ent0->wr_count || ent0->chunk_size==ent0->wr_count)) {
-	    /*
-	     * Method 0: Preempt entries that have a zero rd_count.  If the
-	     * application is accessing a dataset with a set of
-	     * non-overlapping partial I/O requests then chunks with a zero
-	     * rd_count will probably not be accessed in the near future.
-	     */
-	    if (H5F_istore_preempt (f, meth0)<0) nerrors++;
-	    
-	} else if (meth1>=0 && meth1<rdcc->nused && !ent1->locked) {
-	    /*
-	     * Method 1: Discard the least recently used members from the
-	     * cache.  This is a catch-all.
-	     */
-	    if (H5F_istore_preempt (f, meth1)<0) nerrors++;
+	/* Compute next value for each pointer */
+	for (i=0; i<nmeth; i++) n[i] = p[i] ? p[i]->next : NULL;
+
+	/* Give each method a chance */
+	for (i=0; i<nmeth && rdcc->nbytes+size>total; i++) {
+	    if (0==i && p[0] && !p[0]->locked &&
+		((0==p[0]->rd_count && 0==p[0]->wr_count) ||
+		 (0==p[0]->rd_count && p[0]->chunk_size==p[0]->wr_count) ||
+		 (p[0]->chunk_size==p[0]->rd_count && 0==p[0]->wr_count))) {
+		/*
+		 * Method 0: Preempt entries that have been completely written
+		 * and/or completely read but not entries that are partially
+		 * written or partially read.
+		 */
+		cur = p[0];
+#ifdef H5F_ISTORE_DEBUG
+		putc('.', stderr);
+		fflush(stderr);
+#endif
+		
+	    } else if (1==i && p[1] && !p[1]->locked) {
+		/*
+		 * Method 1: Preempt the entry without regard to
+		 * considerations other than being locked.  This is the last
+		 * resort preemption.
+		 */
+		cur = p[1];
+#ifdef H5F_ISTORE_DEBUG
+		putc(':', stderr);
+		fflush(stderr);
+#endif
+		
+	    } else {
+		/* Nothing to preempt at this point */
+		cur= NULL;
+	    }
+
+	    if (cur) {
+		if (H5F_istore_preempt(f, cur)<0) nerrors++;
+		for (j=0; j<nmeth; j++) {
+		    if (p[j]==cur) p[j] = NULL;
+		}
+	    }
 	}
+	
+	/* Advance pointers */
+	for (i=0; i<nmeth; i++) p[i] = n[i];
+	for (i=0; i<nmeth-1; i++) w[i] -= 1;
     }
-#endif
+
     if (nerrors) {
 	HRETURN_ERROR (H5E_IO, H5E_CANTFLUSH, FAIL,
 		       "unable to preempt one or more raw data cache entry");
     }
-
     FUNC_LEAVE (SUCCEED);
 }
 
@@ -1092,12 +1156,11 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 		 const H5O_pline_t *pline, const hssize_t offset[],
 		 hbool_t relax, intn *idx_hint/*in,out*/)
 {
-#ifdef H5F_RDCC_NEW
-    uintn		idx;
-#endif
+    uintn		idx;			/*hash index number	*/
+    hbool_t		found = FALSE;		/*already in cache?	*/
     H5F_rdcc_t		*rdcc = &(f->shared->rdcc);/*raw data chunk cache*/
     H5F_rdcc_ent_t	*ent = NULL;		/*cache entry		*/
-    intn		i, j, found = -1;	/*counters		*/
+    intn		i;			/*counters		*/
     H5F_istore_ud1_t	udata;			/*B-tree pass-through	*/
     size_t		chunk_size=0;		/*size of a chunk	*/
     size_t		chunk_alloc=0;		/*allocated chunk size	*/
@@ -1107,61 +1170,41 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 
     FUNC_ENTER (H5F_istore_lock, NULL);
 
-#ifdef H5F_RDCC_NEW
     if (rdcc->nslots>0) {
 	idx = layout->addr.offset;
-	for (i=0; i<layout->ndims; i++) idx ^= offset[i];
+	for (i=0; i<layout->ndims; i++) idx = (idx<<1) ^ offset[i];
 	idx %= rdcc->nslots;
-	ent = rdcc->slot + idx;
+	ent = rdcc->slot[idx];
     
-	if (ent->chunk &&
+	if (ent &&
 	    layout->ndims==ent->layout->ndims &&
 	    H5F_addr_eq(&(layout->addr), &(ent->layout->addr))) {
-	    for (i=0, found=idx; i<ent->layout->ndims; i++) {
+	    for (i=0, found=TRUE; i<ent->layout->ndims; i++) {
 		if (offset[i]!=ent->offset[i]) {
-		    found = -1;
+		    found = FALSE;
 		    break;
 		}
 	    }
 	}
     }
-#else
-    /* First use the hint because that's O(1) */
-    if (idx_hint && *idx_hint>=0 && *idx_hint<rdcc->nused) {
-	ent = rdcc->slot + *idx_hint;
-	if (layout->ndims==ent->layout->ndims &&
-	    H5F_addr_eq(&(layout->addr), &(ent->layout->addr))) {
-	    for (i=0, found=*idx_hint; found>=0 && i<ent->layout->ndims; i++) {
-		if (offset[i]!=ent->offset[i]) found = -1;
-	    }
-	}
-    }
-
-    /* If the hint is wrong then search the cache, O(n) */
-    for (i=0; found<0 && i<rdcc->nused; i++) {
-	ent = rdcc->slot + i;
-	if (layout->ndims==ent->layout->ndims &&
-	    H5F_addr_eq(&(layout->addr), &(ent->layout->addr))) {
-	    for (j=0, found=i; found>=0 && j<ent->layout->ndims; j++) {
-		if (offset[j]!=ent->offset[j]) found = -1;
-	    }
-	}
-    }
-#endif
 
-    if (found>=0) {
+    if (found) {
 	/*
 	 * Already in the cache.  Count a hit.
 	 */
 	rdcc->nhits++;
-	
-    } else if (found<0 && relax) {
+
+    } else if (!found && relax) {
 	/*
 	 * Not in the cache, but we're about to overwrite the whole thing
 	 * anyway, so just allocate a buffer for it but don't initialize that
 	 * buffer with the file contents. Count this as a hit instead of a
 	 * miss because we saved ourselves lots of work.
 	 */
+#ifdef H5F_ISTORE_DEBUG
+	putc('w', stderr);
+	fflush(stderr);
+#endif
 	rdcc->nhits++;
 	for (i=0, chunk_size=1; i<layout->ndims; i++) {
 	    chunk_size *= layout->dim[i];
@@ -1215,26 +1258,44 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 	    rdcc->ninits++;
 	}
     }
-
-    assert (found>=0 || chunk_size>0);
-#ifdef H5F_RDCC_NEW
-    if (found<0 && rdcc->nslots>0 &&
+    assert (found || chunk_size>0);
+    
+    if (!found && rdcc->nslots>0 &&
 	chunk_size<=f->shared->access_parms->rdcc_nbytes &&
-	(NULL==ent->chunk || !ent->locked)) {
+	(!ent || !ent->locked)) {
 	/*
 	 * Add the chunk to the cache only if the slot is not already locked.
 	 * Preempt enough things from the cache to make room.
 	 */
+	if (ent) {
+#ifdef H5F_ISTORE_DEBUG
+	    putc('#', stderr);
+	    fflush(stderr);
+#endif
+#if 0
+	    HDfprintf(stderr, "\ncollision %3d %10a {",
+		      idx, &(ent->layout->addr));
+	    for (i=0; i<layout->ndims; i++) {
+		HDfprintf(stderr, "%s%Zu", i?",":"", ent->offset[i]);
+	    }
+	    HDfprintf(stderr, "}\n              %10a {", &(layout->addr));
+	    for (i=0; i<layout->ndims; i++) {
+		HDfprintf(stderr, "%s%Zu", i?",":"", offset[i]);
+	    }
+	    fprintf(stderr, "}\n");
+#endif
+	    if (H5F_istore_preempt(f, ent)<0) {
+		HGOTO_ERROR(H5E_IO, H5E_CANTINIT, NULL,
+			    "unable to preempt chunk from cache");
+	    }
+	}
 	if (H5F_istore_prune(f, chunk_size)<0) {
 	    HGOTO_ERROR(H5E_IO, H5E_CANTINIT, NULL,
 			"unable to preempt chunk(s) from cache");
 	}
-	if (rdcc->slot[idx].chunk &&
-	    H5F_istore_preempt(f, idx)<0) {
-	    HGOTO_ERROR(H5E_IO, H5E_CANTINIT, NULL,
-			"unable to preempt chunk from cache");
-	}
-	ent = rdcc->slot + idx;
+
+	/* Create a new entry */
+	ent = H5MM_malloc(sizeof(H5F_rdcc_ent_t));
 	ent->locked = 0;
 	ent->dirty = FALSE;
 	ent->chunk_size = chunk_size;
@@ -1247,83 +1308,67 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 	ent->rd_count = chunk_size;
 	ent->wr_count = chunk_size;
 	ent->chunk = chunk;
-	found = idx;
-    }
-#else
-    if (found<0 && chunk_size<=f->shared->access_parms->rdcc_nbytes) {
-	/*
-	 * Add the chunk to the beginning of the cache after pruning the cache
-	 * to make room.
-	 */
-	if (H5F_istore_prune (f, chunk_size)<0) {
-	    H5E_clear ();
-	}
-	if (rdcc->nused>=rdcc->nslots) {
-	    size_t na = MAX (25, 2*rdcc->nslots);
-	    H5F_rdcc_ent_t *x = H5MM_realloc (rdcc->slot,
-					      na*sizeof(H5F_rdcc_ent_t));
-	    if (NULL==x) {
-		HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, NULL,
-			     "memory allocation failed");
-	    }
-	    rdcc->nslots = (intn)na;
-	    rdcc->slot = x;
-	}
-	HDmemmove (rdcc->slot+1, rdcc->slot,
-		   rdcc->nused*sizeof(H5F_rdcc_ent_t));
-	rdcc->nused++;
+
+	/* Add it to the cache */
+	assert(NULL==rdcc->slot[idx]);
+	rdcc->slot[idx] = ent;
+	ent->idx = idx;
 	rdcc->nbytes += chunk_size;
-	ent = rdcc->slot;
-	ent->locked = 0;
-	ent->dirty = FALSE;
-	ent->chunk_size = chunk_size;
-	ent->alloc_size = chunk_alloc;
-	ent->layout = H5O_copy (H5O_LAYOUT, layout, NULL);
-	ent->pline = H5O_copy (H5O_PLINE, pline, NULL);
-	for (i=0; i<layout->ndims; i++) {
-	    ent->offset[i] = offset[i];
+	rdcc->nused++;
+
+	/* Add it to the linked list */
+	ent->next = NULL;
+	if (rdcc->tail) {
+	    rdcc->tail->next = ent;
+	    ent->prev = rdcc->tail;
+	    rdcc->tail = ent;
+	} else {
+	    rdcc->head = rdcc->tail = ent;
+	    ent->prev = NULL;
 	}
-	ent->rd_count = chunk_size;
-	ent->wr_count = chunk_size;
-	ent->chunk = chunk;
-	found = 0;
-    }
-#endif
-    else if (found<0) {
+	found = TRUE;
+	
+    } else if (!found) {
 	/*
 	 * The chunk is larger than the entire cache so we don't cache it.
 	 * This is the reason all those arguments have to be repeated for the
 	 * unlock function.
 	 */
 	ent = NULL;
-	found = -999;
+	idx = -999;
 
-#ifndef H5F_RDCC_NEW
-    } else if (found>0) {
+    } else if (found) {
 	/*
-	 * The chunk is not at the beginning of the cache; move it forward by
-	 * one slot.  This is how we implement the LRU preemption algorithm.
+	 * The chunk is not at the beginning of the cache; move it backward
+	 * by one slot.  This is how we implement the LRU preemption
+	 * algorithm.
 	 */
-	H5F_rdcc_ent_t x = rdcc->slot[found];
-	rdcc->slot[found] = rdcc->slot[found-1];
-	rdcc->slot[found-1] = x;
-	ent = rdcc->slot + --found;
-#endif
+	if (ent->next) {
+	    if (ent->next->next) {
+		ent->next->next->prev = ent;
+	    } else {
+		rdcc->tail = ent;
+	    }
+	    ent->next->prev = ent->prev;
+	    if (ent->prev) {
+		ent->prev->next = ent->next;
+	    } else {
+		rdcc->head = ent->next;
+	    }
+	    ent->prev = ent->next;
+	    ent->next = ent->next->next;
+	    ent->prev->next = ent;
+	}
     }
 
     /* Lock the chunk into the cache */
     if (ent) {
 	assert (!ent->locked);
 	ent->locked = TRUE;
-#ifndef H5F_RDCC_NEW
-	if (idx_hint) *idx_hint = found;
-#endif
 	chunk = ent->chunk;
     }
 
-#ifdef H5F_RDCC_NEW
-    if (idx_hint) *idx_hint = found;
-#endif
+    if (idx_hint) *idx_hint = idx;
     ret_value = chunk;
     
  done:
@@ -1370,25 +1415,14 @@ H5F_istore_unlock (H5F_t *f, const H5O_layout_t *layout,
     
     FUNC_ENTER (H5F_istore_unlock, FAIL);
 
-#ifdef H5F_RDCC_NEW
     if (-999==*idx_hint) {
 	/*not in cache*/
     } else {
 	assert(*idx_hint>=0 && *idx_hint<rdcc->nslots);
-	assert(rdcc->slot[*idx_hint].chunk==chunk);
+	assert(rdcc->slot[*idx_hint]);
+	assert(rdcc->slot[*idx_hint]->chunk==chunk);
 	found = *idx_hint;
     }
-#else
-    /* First look at the hint */
-    if (idx_hint && *idx_hint>=0 && *idx_hint<rdcc->nused) {
-	if (rdcc->slot[*idx_hint].chunk==chunk) found = *idx_hint;
-    }
-
-    /* Then look at all the entries */
-    for (i=0; found<0 && i<rdcc->nused; i++) {
-	if (rdcc->slot[i].chunk==chunk) found = i;
-    }
-#endif
     
     if (found<0) {
 	/*
@@ -1417,7 +1451,7 @@ H5F_istore_unlock (H5F_t *f, const H5O_layout_t *layout,
 	/*
 	 * It's in the cache so unlock it.
 	 */
-	ent = rdcc->slot + found;
+	ent = rdcc->slot[found];
 	assert (ent->locked);
 	if (dirty) {
 	    ent->dirty = TRUE;
diff --git a/src/H5F.c b/src/H5F.c
index a3e6782..25c328c 100644
--- a/src/H5F.c
+++ b/src/H5F.c
@@ -159,6 +159,7 @@ H5F_init_interface(void)
 
     /* Initialize the default file access property list */
     H5F_access_dflt.mdc_nelmts = H5AC_NSLOTS;
+    H5F_access_dflt.rdcc_nelmts = 521;
     H5F_access_dflt.rdcc_nbytes = 1024*1024; /*1MB*/
     H5F_access_dflt.rdcc_w0 = 0.75; /*preempt fully read chunks*/
     H5F_access_dflt.threshold = 1; /*alignment applies to everything*/
diff --git a/src/H5Fistore.c b/src/H5Fistore.c
index dcd1d35..278bca9 100644
--- a/src/H5Fistore.c
+++ b/src/H5Fistore.c
@@ -13,6 +13,21 @@
  *		chunk index to disk address.  Each chunk can be compressed
  *		independently and the chunks may move around in the file as
  *		their storage requirements change.
+ *
+ * Cache:	Disk I/O is performed in units of chunks and H5MF_alloc()
+ *		contains code to optionally align chunks on disk block
+ *		boundaries for performance.
+ *
+ *		The chunk cache is an extendible hash indexed by a function
+ *		of storage B-tree address and chunk N-dimensional offset
+ *		within the dataset.  Collisions are not resolved -- one of
+ *		the two chunks competing for the hash slot must be preempted
+ *		from the cache.  All entries in the hash also participate in
+ *		a doubly-linked list and entries are penalized by moving them
+ *		toward the front of the list.  When a new chunk is about to
+ *		be added to the cache the heap is pruned by preempting
+ *		entries near the front of the list to make room for the new
+ *		entry which is added to the end of the list.
  */
 #include <H5private.h>
 #include <H5Dprivate.h>
@@ -23,6 +38,31 @@
 #include <H5Oprivate.h>
 #include <H5Vprivate.h>
 
+/*
+ * Feature: If this constant is defined then every cache preemption and load
+ *	    causes a character to be printed on the standard error stream:
+ *
+ *     `.': Entry was preempted because it has been completely read or
+ *	    completely written but not partially read and not partially
+ *	    written. This is often a good reason for preemption because such
+ *	    a chunk will be unlikely to be referenced in the near future.
+ *
+ *     `:': Entry was preempted because it hasn't been used recently.
+ *
+ *     `#': Entry was preempted because another chunk collided with it. This
+ *	    is usually a relatively bad thing.  If there are too many of
+ *	    these then the number of entries in the cache can be increased.
+ *
+ *       c: Entry was preempted because the file is closing.
+ *
+ *	 w: A chunk read operation was eliminated because the library is
+ *	    about to write new values to the entire chunk.  This is a good
+ *	    thing, especially on files where the chunk size is the same as
+ *	    the disk block size, chunks are aligned on disk block boundaries,
+ *	    and the operating system can also eliminate a read operation.
+ */
+/* #define H5F_ISTORE_DEBUG */
+
 /* Interface initialization */
 #define PABLO_MASK	H5F_istore_mask
 static hbool_t		interface_initialize_g = FALSE;
@@ -40,6 +80,9 @@ typedef struct H5F_rdcc_ent_t {
     size_t	chunk_size;	/*size of a chunk			*/
     size_t	alloc_size;	/*amount allocated for the chunk	*/
     uint8	*chunk;		/*the unfiltered chunk data		*/
+    intn	idx;		/*index in hash table			*/
+    struct H5F_rdcc_ent_t *next;/*next item in doubly-linked list	*/
+    struct H5F_rdcc_ent_t *prev;/*previous item in doubly-linked list	*/
 } H5F_rdcc_ent_t;
 
 /* Private prototypes */
@@ -688,9 +731,10 @@ H5F_istore_init (H5F_t *f)
     FUNC_ENTER (H5F_istore_init, FAIL);
 
     HDmemset (rdcc, 0, sizeof(H5F_rdcc_t));
-    if (f->shared->access_parms->rdcc_nbytes>0) {
-	rdcc->nslots = 25; /*some initial number of slots*/
-	rdcc->slot = H5MM_calloc (rdcc->nslots*sizeof(H5F_rdcc_ent_t));
+    if (f->shared->access_parms->rdcc_nbytes>0 &&
+	f->shared->access_parms->rdcc_nelmts>0) {
+	rdcc->nslots = f->shared->access_parms->rdcc_nelmts;
+	rdcc->slot = H5MM_calloc (rdcc->nslots*sizeof(H5F_rdcc_ent_t*));
 	if (NULL==rdcc->slot) {
 	    HRETURN_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL,
 			   "memory allocation failed");
@@ -797,7 +841,7 @@ H5F_istore_flush_entry (H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset)
 	f->shared->rdcc.nflushes++;
     }
     
-    /* Reset */
+    /* Reset, but do not free or removed from list */
     if (reset) {
 	point_of_no_return = FALSE;
 	ent->layout = H5O_free(H5O_LAYOUT, ent->layout);
@@ -815,7 +859,8 @@ H5F_istore_flush_entry (H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset)
     /*
      * If we reached the point of no return then we have no choice but to
      * reset the entry.  This can only happen if RESET is true but the
-     * output pipeline failed.
+     * output pipeline failed.  Do not free the entry or remove it from the
+     * list.
      */
     if (ret_value<0 && point_of_no_return) {
 	ent->layout = H5O_free(H5O_LAYOUT, ent->layout);
@@ -846,25 +891,17 @@ H5F_istore_flush_entry (H5F_t *f, H5F_rdcc_ent_t *ent, hbool_t reset)
 herr_t
 H5F_istore_flush (H5F_t *f)
 {
-    H5F_rdcc_t	*rdcc = &(f->shared->rdcc);
-    intn	i, nerrors=0;
+    H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
+    intn		nerrors=0;
+    H5F_rdcc_ent_t	*ent=NULL;
     
     FUNC_ENTER (H5F_istore_flush, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    for (i=0; i<rdcc->nslots; i++) {
-	if (rdcc->slot[i].chunk &&
-	    H5F_istore_flush_entry(f, rdcc->slot+i, FALSE)<0) {
-	    nerrors++;
-	}
-    }
-#else
-    for (i=0; i<rdcc->nused; i++) {
-	if (H5F_istore_flush_entry (f, rdcc->slot+i, FALSE)<0) {
+    for (ent=rdcc->head; ent; ent=ent->next) {
+	if (H5F_istore_flush_entry(f, ent, FALSE)<0) {
 	    nerrors++;
 	}
     }
-#endif
     if (nerrors) {
 	HRETURN_ERROR (H5E_IO, H5E_CANTFLUSH, FAIL,
 		       "unable to flush one or more raw data chunks");
@@ -890,29 +927,41 @@ H5F_istore_flush (H5F_t *f)
  *-------------------------------------------------------------------------
  */
 static herr_t
-H5F_istore_preempt (H5F_t *f, intn idx)
+H5F_istore_preempt (H5F_t *f, H5F_rdcc_ent_t *ent)
 {
     H5F_rdcc_t          *rdcc = &(f->shared->rdcc);
-    H5F_rdcc_ent_t      *ent = rdcc->slot + idx;
     
     FUNC_ENTER (H5F_istore_preempt, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    assert(idx>=0 && idx<rdcc->nslots);
-    assert (!ent->locked);
+    assert(f);
+    assert(ent);
+    assert(!ent->locked);
+    assert(ent->idx>=0 && ent->idx<rdcc->nslots);
 
+    /* Flush */
     H5F_istore_flush_entry(f, ent, TRUE);
-    rdcc->nbytes -= ent->chunk_size;
-#else
-    assert (idx>=0 && idx<rdcc->nused);
-    assert (!ent->locked);
 
-    H5F_istore_flush_entry (f, ent, TRUE);
-    HDmemmove (rdcc->slot+idx, rdcc->slot+idx+1,
-               (rdcc->nused-(idx+1)) * sizeof(H5F_rdcc_ent_t));
-    rdcc->nused -= 1;
+    /* Unlink from list */
+    if (ent->prev) {
+	ent->prev->next = ent->next;
+    } else {
+	rdcc->head = ent->next;
+    }
+    if (ent->next) {
+	ent->next->prev = ent->prev;
+    } else {
+	rdcc->tail = ent->prev;
+    }
+    ent->prev = ent->next = NULL;
+
+    /* Remove from cache */
+    rdcc->slot[ent->idx] = NULL;
+    ent->idx = -1;
     rdcc->nbytes -= ent->chunk_size;
-#endif
+    --rdcc->nused;
+
+    /* Free */
+    H5MM_xfree(ent);
 
     FUNC_LEAVE (SUCCEED);
 }
@@ -938,25 +987,22 @@ H5F_istore_preempt (H5F_t *f, intn idx)
 herr_t
 H5F_istore_dest (H5F_t *f)
 {
-    H5F_rdcc_t	*rdcc = &(f->shared->rdcc);
-    intn	i, nerrors=0;
+    H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
+    intn		nerrors=0;
+    H5F_rdcc_ent_t	*ent=NULL, *next=NULL;
     
     FUNC_ENTER (H5F_istore_dest, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    for (i=0; i<rdcc->nslots; i++) {
-	if (rdcc->slot[i].chunk &&
-	    H5F_istore_preempt(f, i)<0) {
-	    nerrors++;
-	}
-    }
-#else
-    for (i=rdcc->nused-1; i>=0; --i) {
-	if (H5F_istore_preempt(f, i)<0) {
+    for (ent=rdcc->head; ent; ent=next) {
+#ifdef H5F_ISTORE_DEBUG
+	fputc('c', stderr);
+	fflush(stderr);
+#endif
+	next = ent->next;
+	if (H5F_istore_preempt(f, ent)<0) {
 	    nerrors++;
 	}
     }
-#endif
     if (nerrors) {
 	HRETURN_ERROR (H5E_IO, H5E_CANTFLUSH, FAIL,
 		       "unable to flush one or more raw data chunks");
@@ -989,71 +1035,89 @@ H5F_istore_dest (H5F_t *f)
 static herr_t
 H5F_istore_prune (H5F_t *f, size_t size)
 {
-#ifdef H5F_RDCC_NEW
-    intn		i, nerrors=0;
-    static intn		place=0;
-    H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
-    H5F_rdcc_ent_t 	*ent = NULL;
-    size_t		total = f->shared->access_parms->rdcc_nbytes;
-#else
-    intn		i, meth0, meth1, nerrors=0;
+    intn		i, j, nerrors=0;
     H5F_rdcc_t		*rdcc = &(f->shared->rdcc);
-    H5F_rdcc_ent_t	*ent0, *ent1;
-    double		w0 = f->shared->access_parms->rdcc_w0;
     size_t		total = f->shared->access_parms->rdcc_nbytes;
-#endif
-    
+    const int		nmeth=2;	/*number of methods		*/
+    intn		w[1];		/*weighting as an interval	*/
+    H5F_rdcc_ent_t	*p[2], *cur;	/*list pointers			*/
+    H5F_rdcc_ent_t	*n[2];		/*list next pointers		*/
+
     FUNC_ENTER (H5F_istore_prune, FAIL);
 
-#ifdef H5F_RDCC_NEW
-    for (i=0; rdcc->nbytes+size>total && i<rdcc->nslots; i++, place++) {
-	if (place>=rdcc->nslots) place = 0;
-	ent = rdcc->slot+place;
-	if (ent->chunk && !ent->locked) {
-	    if (H5F_istore_preempt(f, place)<0) nerrors++;
-	}
-    }
-#else
     /*
-     * We have two pointers that slide down the cache beginning at the least
-     * recently used entry.  The distance between the pointers represents the
-     * relative weight.  A weight of 50% for the first pointer means that the
-     * second pointer is half the cache length behind the first pointer.
+     * Preemption is accomplished by having multiple pointers (currently two)
+     * slide down the list beginning at the head. Pointer p(N+1) will start
+     * traversing the list when pointer pN reaches wN percent of the original
+     * list.  In other words, preemption method N gets to consider entries in
+     * approximate least recently used order w0 percent before method N+1
+     * where 100% means tha method N will run to completion before method N+1
+     * begins.  The pointers participating in the list traversal are each
+     * given a chance at preemption before any of the pointers are advanced.
      */
-    meth0 = rdcc->nused;
-    meth1 = rdcc->nused * (1.0+w0);
-    for (i=MAX(meth0, meth1)-1;
-	 rdcc->nbytes+size>total && i>=0;
-	 --i, --meth0, --meth1) {
-
-	ent0 = rdcc->slot+meth0; /*might be a bad pointer!*/
-	ent1 = rdcc->slot+meth1; /*might be a bad pointer!*/
+    w[0] = rdcc->nused * f->shared->access_parms->rdcc_w0;
+    p[0] = rdcc->head;
+    p[1] = NULL;
+
+    while ((p[0] || p[1]) && rdcc->nbytes+size>total) {
+
+	/* Introduce new pointers */
+	for (i=0; i<nmeth-1; i++) if (0==w[i]) p[i+1] = rdcc->head;
 	
-	if (meth0>=0 && meth0<rdcc->nused && !ent0->locked &&
-	    (0==ent0->rd_count || ent0->chunk_size==ent0->rd_count) &&
-	    (0==ent0->wr_count || ent0->chunk_size==ent0->wr_count)) {
-	    /*
-	     * Method 0: Preempt entries that have a zero rd_count.  If the
-	     * application is accessing a dataset with a set of
-	     * non-overlapping partial I/O requests then chunks with a zero
-	     * rd_count will probably not be accessed in the near future.
-	     */
-	    if (H5F_istore_preempt (f, meth0)<0) nerrors++;
-	    
-	} else if (meth1>=0 && meth1<rdcc->nused && !ent1->locked) {
-	    /*
-	     * Method 1: Discard the least recently used members from the
-	     * cache.  This is a catch-all.
-	     */
-	    if (H5F_istore_preempt (f, meth1)<0) nerrors++;
+	/* Compute next value for each pointer */
+	for (i=0; i<nmeth; i++) n[i] = p[i] ? p[i]->next : NULL;
+
+	/* Give each method a chance */
+	for (i=0; i<nmeth && rdcc->nbytes+size>total; i++) {
+	    if (0==i && p[0] && !p[0]->locked &&
+		((0==p[0]->rd_count && 0==p[0]->wr_count) ||
+		 (0==p[0]->rd_count && p[0]->chunk_size==p[0]->wr_count) ||
+		 (p[0]->chunk_size==p[0]->rd_count && 0==p[0]->wr_count))) {
+		/*
+		 * Method 0: Preempt entries that have been completely written
+		 * and/or completely read but not entries that are partially
+		 * written or partially read.
+		 */
+		cur = p[0];
+#ifdef H5F_ISTORE_DEBUG
+		putc('.', stderr);
+		fflush(stderr);
+#endif
+		
+	    } else if (1==i && p[1] && !p[1]->locked) {
+		/*
+		 * Method 1: Preempt the entry without regard to
+		 * considerations other than being locked.  This is the last
+		 * resort preemption.
+		 */
+		cur = p[1];
+#ifdef H5F_ISTORE_DEBUG
+		putc(':', stderr);
+		fflush(stderr);
+#endif
+		
+	    } else {
+		/* Nothing to preempt at this point */
+		cur= NULL;
+	    }
+
+	    if (cur) {
+		if (H5F_istore_preempt(f, cur)<0) nerrors++;
+		for (j=0; j<nmeth; j++) {
+		    if (p[j]==cur) p[j] = NULL;
+		}
+	    }
 	}
+	
+	/* Advance pointers */
+	for (i=0; i<nmeth; i++) p[i] = n[i];
+	for (i=0; i<nmeth-1; i++) w[i] -= 1;
     }
-#endif
+
     if (nerrors) {
 	HRETURN_ERROR (H5E_IO, H5E_CANTFLUSH, FAIL,
 		       "unable to preempt one or more raw data cache entry");
     }
-
     FUNC_LEAVE (SUCCEED);
 }
 
@@ -1092,12 +1156,11 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 		 const H5O_pline_t *pline, const hssize_t offset[],
 		 hbool_t relax, intn *idx_hint/*in,out*/)
 {
-#ifdef H5F_RDCC_NEW
-    uintn		idx;
-#endif
+    uintn		idx;			/*hash index number	*/
+    hbool_t		found = FALSE;		/*already in cache?	*/
     H5F_rdcc_t		*rdcc = &(f->shared->rdcc);/*raw data chunk cache*/
     H5F_rdcc_ent_t	*ent = NULL;		/*cache entry		*/
-    intn		i, j, found = -1;	/*counters		*/
+    intn		i;			/*counters		*/
     H5F_istore_ud1_t	udata;			/*B-tree pass-through	*/
     size_t		chunk_size=0;		/*size of a chunk	*/
     size_t		chunk_alloc=0;		/*allocated chunk size	*/
@@ -1107,61 +1170,41 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 
     FUNC_ENTER (H5F_istore_lock, NULL);
 
-#ifdef H5F_RDCC_NEW
     if (rdcc->nslots>0) {
 	idx = layout->addr.offset;
-	for (i=0; i<layout->ndims; i++) idx ^= offset[i];
+	for (i=0; i<layout->ndims; i++) idx = (idx<<1) ^ offset[i];
 	idx %= rdcc->nslots;
-	ent = rdcc->slot + idx;
+	ent = rdcc->slot[idx];
     
-	if (ent->chunk &&
+	if (ent &&
 	    layout->ndims==ent->layout->ndims &&
 	    H5F_addr_eq(&(layout->addr), &(ent->layout->addr))) {
-	    for (i=0, found=idx; i<ent->layout->ndims; i++) {
+	    for (i=0, found=TRUE; i<ent->layout->ndims; i++) {
 		if (offset[i]!=ent->offset[i]) {
-		    found = -1;
+		    found = FALSE;
 		    break;
 		}
 	    }
 	}
     }
-#else
-    /* First use the hint because that's O(1) */
-    if (idx_hint && *idx_hint>=0 && *idx_hint<rdcc->nused) {
-	ent = rdcc->slot + *idx_hint;
-	if (layout->ndims==ent->layout->ndims &&
-	    H5F_addr_eq(&(layout->addr), &(ent->layout->addr))) {
-	    for (i=0, found=*idx_hint; found>=0 && i<ent->layout->ndims; i++) {
-		if (offset[i]!=ent->offset[i]) found = -1;
-	    }
-	}
-    }
-
-    /* If the hint is wrong then search the cache, O(n) */
-    for (i=0; found<0 && i<rdcc->nused; i++) {
-	ent = rdcc->slot + i;
-	if (layout->ndims==ent->layout->ndims &&
-	    H5F_addr_eq(&(layout->addr), &(ent->layout->addr))) {
-	    for (j=0, found=i; found>=0 && j<ent->layout->ndims; j++) {
-		if (offset[j]!=ent->offset[j]) found = -1;
-	    }
-	}
-    }
-#endif
 
-    if (found>=0) {
+    if (found) {
 	/*
 	 * Already in the cache.  Count a hit.
 	 */
 	rdcc->nhits++;
-	
-    } else if (found<0 && relax) {
+
+    } else if (!found && relax) {
 	/*
 	 * Not in the cache, but we're about to overwrite the whole thing
 	 * anyway, so just allocate a buffer for it but don't initialize that
 	 * buffer with the file contents. Count this as a hit instead of a
 	 * miss because we saved ourselves lots of work.
 	 */
+#ifdef H5F_ISTORE_DEBUG
+	putc('w', stderr);
+	fflush(stderr);
+#endif
 	rdcc->nhits++;
 	for (i=0, chunk_size=1; i<layout->ndims; i++) {
 	    chunk_size *= layout->dim[i];
@@ -1215,26 +1258,44 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 	    rdcc->ninits++;
 	}
     }
-
-    assert (found>=0 || chunk_size>0);
-#ifdef H5F_RDCC_NEW
-    if (found<0 && rdcc->nslots>0 &&
+    assert (found || chunk_size>0);
+    
+    if (!found && rdcc->nslots>0 &&
 	chunk_size<=f->shared->access_parms->rdcc_nbytes &&
-	(NULL==ent->chunk || !ent->locked)) {
+	(!ent || !ent->locked)) {
 	/*
 	 * Add the chunk to the cache only if the slot is not already locked.
 	 * Preempt enough things from the cache to make room.
 	 */
+	if (ent) {
+#ifdef H5F_ISTORE_DEBUG
+	    putc('#', stderr);
+	    fflush(stderr);
+#endif
+#if 0
+	    HDfprintf(stderr, "\ncollision %3d %10a {",
+		      idx, &(ent->layout->addr));
+	    for (i=0; i<layout->ndims; i++) {
+		HDfprintf(stderr, "%s%Zu", i?",":"", ent->offset[i]);
+	    }
+	    HDfprintf(stderr, "}\n              %10a {", &(layout->addr));
+	    for (i=0; i<layout->ndims; i++) {
+		HDfprintf(stderr, "%s%Zu", i?",":"", offset[i]);
+	    }
+	    fprintf(stderr, "}\n");
+#endif
+	    if (H5F_istore_preempt(f, ent)<0) {
+		HGOTO_ERROR(H5E_IO, H5E_CANTINIT, NULL,
+			    "unable to preempt chunk from cache");
+	    }
+	}
 	if (H5F_istore_prune(f, chunk_size)<0) {
 	    HGOTO_ERROR(H5E_IO, H5E_CANTINIT, NULL,
 			"unable to preempt chunk(s) from cache");
 	}
-	if (rdcc->slot[idx].chunk &&
-	    H5F_istore_preempt(f, idx)<0) {
-	    HGOTO_ERROR(H5E_IO, H5E_CANTINIT, NULL,
-			"unable to preempt chunk from cache");
-	}
-	ent = rdcc->slot + idx;
+
+	/* Create a new entry */
+	ent = H5MM_malloc(sizeof(H5F_rdcc_ent_t));
 	ent->locked = 0;
 	ent->dirty = FALSE;
 	ent->chunk_size = chunk_size;
@@ -1247,83 +1308,67 @@ H5F_istore_lock (H5F_t *f, const H5O_layout_t *layout,
 	ent->rd_count = chunk_size;
 	ent->wr_count = chunk_size;
 	ent->chunk = chunk;
-	found = idx;
-    }
-#else
-    if (found<0 && chunk_size<=f->shared->access_parms->rdcc_nbytes) {
-	/*
-	 * Add the chunk to the beginning of the cache after pruning the cache
-	 * to make room.
-	 */
-	if (H5F_istore_prune (f, chunk_size)<0) {
-	    H5E_clear ();
-	}
-	if (rdcc->nused>=rdcc->nslots) {
-	    size_t na = MAX (25, 2*rdcc->nslots);
-	    H5F_rdcc_ent_t *x = H5MM_realloc (rdcc->slot,
-					      na*sizeof(H5F_rdcc_ent_t));
-	    if (NULL==x) {
-		HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, NULL,
-			     "memory allocation failed");
-	    }
-	    rdcc->nslots = (intn)na;
-	    rdcc->slot = x;
-	}
-	HDmemmove (rdcc->slot+1, rdcc->slot,
-		   rdcc->nused*sizeof(H5F_rdcc_ent_t));
-	rdcc->nused++;
+
+	/* Add it to the cache */
+	assert(NULL==rdcc->slot[idx]);
+	rdcc->slot[idx] = ent;
+	ent->idx = idx;
 	rdcc->nbytes += chunk_size;
-	ent = rdcc->slot;
-	ent->locked = 0;
-	ent->dirty = FALSE;
-	ent->chunk_size = chunk_size;
-	ent->alloc_size = chunk_alloc;
-	ent->layout = H5O_copy (H5O_LAYOUT, layout, NULL);
-	ent->pline = H5O_copy (H5O_PLINE, pline, NULL);
-	for (i=0; i<layout->ndims; i++) {
-	    ent->offset[i] = offset[i];
+	rdcc->nused++;
+
+	/* Add it to the linked list */
+	ent->next = NULL;
+	if (rdcc->tail) {
+	    rdcc->tail->next = ent;
+	    ent->prev = rdcc->tail;
+	    rdcc->tail = ent;
+	} else {
+	    rdcc->head = rdcc->tail = ent;
+	    ent->prev = NULL;
 	}
-	ent->rd_count = chunk_size;
-	ent->wr_count = chunk_size;
-	ent->chunk = chunk;
-	found = 0;
-    }
-#endif
-    else if (found<0) {
+	found = TRUE;
+	
+    } else if (!found) {
 	/*
 	 * The chunk is larger than the entire cache so we don't cache it.
 	 * This is the reason all those arguments have to be repeated for the
 	 * unlock function.
 	 */
 	ent = NULL;
-	found = -999;
+	idx = -999;
 
-#ifndef H5F_RDCC_NEW
-    } else if (found>0) {
+    } else if (found) {
 	/*
-	 * The chunk is not at the beginning of the cache; move it forward by
-	 * one slot.  This is how we implement the LRU preemption algorithm.
+	 * The chunk is not at the beginning of the cache; move it backward
+	 * by one slot.  This is how we implement the LRU preemption
+	 * algorithm.
 	 */
-	H5F_rdcc_ent_t x = rdcc->slot[found];
-	rdcc->slot[found] = rdcc->slot[found-1];
-	rdcc->slot[found-1] = x;
-	ent = rdcc->slot + --found;
-#endif
+	if (ent->next) {
+	    if (ent->next->next) {
+		ent->next->next->prev = ent;
+	    } else {
+		rdcc->tail = ent;
+	    }
+	    ent->next->prev = ent->prev;
+	    if (ent->prev) {
+		ent->prev->next = ent->next;
+	    } else {
+		rdcc->head = ent->next;
+	    }
+	    ent->prev = ent->next;
+	    ent->next = ent->next->next;
+	    ent->prev->next = ent;
+	}
     }
 
     /* Lock the chunk into the cache */
     if (ent) {
 	assert (!ent->locked);
 	ent->locked = TRUE;
-#ifndef H5F_RDCC_NEW
-	if (idx_hint) *idx_hint = found;
-#endif
 	chunk = ent->chunk;
     }
 
-#ifdef H5F_RDCC_NEW
-    if (idx_hint) *idx_hint = found;
-#endif
+    if (idx_hint) *idx_hint = idx;
     ret_value = chunk;
     
  done:
@@ -1370,25 +1415,14 @@ H5F_istore_unlock (H5F_t *f, const H5O_layout_t *layout,
     
     FUNC_ENTER (H5F_istore_unlock, FAIL);
 
-#ifdef H5F_RDCC_NEW
     if (-999==*idx_hint) {
 	/*not in cache*/
     } else {
 	assert(*idx_hint>=0 && *idx_hint<rdcc->nslots);
-	assert(rdcc->slot[*idx_hint].chunk==chunk);
+	assert(rdcc->slot[*idx_hint]);
+	assert(rdcc->slot[*idx_hint]->chunk==chunk);
 	found = *idx_hint;
     }
-#else
-    /* First look at the hint */
-    if (idx_hint && *idx_hint>=0 && *idx_hint<rdcc->nused) {
-	if (rdcc->slot[*idx_hint].chunk==chunk) found = *idx_hint;
-    }
-
-    /* Then look at all the entries */
-    for (i=0; found<0 && i<rdcc->nused; i++) {
-	if (rdcc->slot[i].chunk==chunk) found = i;
-    }
-#endif
     
     if (found<0) {
 	/*
@@ -1417,7 +1451,7 @@ H5F_istore_unlock (H5F_t *f, const H5O_layout_t *layout,
 	/*
 	 * It's in the cache so unlock it.
 	 */
-	ent = rdcc->slot + found;
+	ent = rdcc->slot[found];
 	assert (ent->locked);
 	if (dirty) {
 	    ent->dirty = TRUE;
diff --git a/src/H5Fprivate.h b/src/H5Fprivate.h
index cf92283..6e80fa0 100644
--- a/src/H5Fprivate.h
+++ b/src/H5Fprivate.h
@@ -239,7 +239,8 @@ typedef struct H5F_create_t {
  * File-access property list.
  */
 typedef struct H5F_access_t {
-    intn	mdc_nelmts;	/* Size of meta data cache (nelmts)	*/
+    intn	mdc_nelmts;	/* Size of meta data cache (elements)	*/
+    intn	rdcc_nelmts;	/* Size of raw data chunk cache (elmts)	*/
     size_t	rdcc_nbytes;	/* Size of raw data chunk cache	(bytes)	*/
     double	rdcc_w0;	/* Preempt read chunks first? [0.0..1.0]*/
     hsize_t	threshold;	/* Threshold for alignment		*/
@@ -419,10 +420,10 @@ typedef struct H5F_rdcc_t {
     uintn		nflushes;/* Number of cache flushes		*/
     size_t		nbytes;	/* Current cached raw data in bytes	*/
     intn		nslots;	/* Number of chunk slots allocated	*/
-#ifndef H5F_RDCC_NEW
+    struct H5F_rdcc_ent_t *head; /* Head of doubly linked list		*/
+    struct H5F_rdcc_ent_t *tail; /* Tail of doubly linked list		*/
     intn		nused;	/* Number of chunk slots in use		*/
-#endif
-    struct H5F_rdcc_ent_t *slot; /* Chunk slots, each points to a chunk	*/
+    struct H5F_rdcc_ent_t **slot; /* Chunk slots, each points to a chunk*/
 } H5F_rdcc_t;
 
 /*
diff --git a/src/H5P.c b/src/H5P.c
index 99b4e67..c229e0a 100644
--- a/src/H5P.c
+++ b/src/H5P.c
@@ -1878,14 +1878,16 @@ H5Pget_family(hid_t plist_id, hsize_t *memb_size/*out*/,
  * Function:	H5Pset_cache
  *
  * Purpose:	Set the number of objects in the meta data cache and the
- *		total number of bytes in the raw data chunk cache.
+ *		maximum number of chunks and bytes in the raw data chunk
+ *		cache.
  *
  * 		The RDCC_W0 value should be between 0 and 1 inclusive and
- *		indicates how much chunks that have been fully read are
- *		favored for preemption.  A value of zero means fully read
- *		chunks are treated no differently than other chunks (the
- *		preemption is strictly LRU) while a value of one means fully
- *		read chunks are always preempted before other chunks.
+ *		indicates how much chunks that have been fully read or fully
+ *		written are favored for preemption.  A value of zero means
+ *		fully read or written chunks are treated no differently than
+ *		other chunks (the preemption is strictly LRU) while a value
+ *		of one means fully read chunks are always preempted before
+ *		other chunks.
  *
  * Return:	Success:	SUCCEED
  *
@@ -1899,13 +1901,14 @@ H5Pget_family(hid_t plist_id, hsize_t *memb_size/*out*/,
  *-------------------------------------------------------------------------
  */
 herr_t
-H5Pset_cache(hid_t plist_id, int mdc_nelmts, size_t rdcc_nbytes,
-	      double rdcc_w0)
+H5Pset_cache(hid_t plist_id, int mdc_nelmts,
+	     int rdcc_nelmts, size_t rdcc_nbytes, double rdcc_w0)
 {
     H5F_access_t	*fapl = NULL;
     
     FUNC_ENTER (H5Pset_cache, FAIL);
-    H5TRACE4("e","iIszd",plist_id,mdc_nelmts,rdcc_nbytes,rdcc_w0);
+    H5TRACE5("e","iIsIszd",plist_id,mdc_nelmts,rdcc_nelmts,rdcc_nbytes,
+             rdcc_w0);
 
     /* Check arguments */
     if (H5P_FILE_ACCESS!=H5P_get_class (plist_id) ||
@@ -1917,6 +1920,10 @@ H5Pset_cache(hid_t plist_id, int mdc_nelmts, size_t rdcc_nbytes,
 	HRETURN_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL,
 		       "meta data cache size must be non-negative");
     }
+    if (rdcc_nelmts<0) {
+	HRETURN_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+		      "raw data chunk cache nelmts must be non-negative");
+    }
     if (rdcc_w0<0.0 || rdcc_w0>1.0) {
 	HRETURN_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL,
 		       "raw data cache w0 value must be between 0.0 and 1.0 "
@@ -1925,6 +1932,7 @@ H5Pset_cache(hid_t plist_id, int mdc_nelmts, size_t rdcc_nbytes,
 
     /* Set sizes */
     fapl->mdc_nelmts = mdc_nelmts;
+    fapl->rdcc_nelmts = rdcc_nelmts;
     fapl->rdcc_nbytes = rdcc_nbytes;
     fapl->rdcc_w0 = rdcc_w0;
 
@@ -1936,10 +1944,10 @@ H5Pset_cache(hid_t plist_id, int mdc_nelmts, size_t rdcc_nbytes,
  * Function:	H5Pget_cache
  *
  * Purpose:	Retrieves the maximum possible number of elements in the meta
- *		data cache and the maximum possible number of bytes and the
- *		RDCC_W0 value in the raw data chunk cache.  Any (or all)
- *		arguments may be null pointers in which case the corresponding
- *		datum is not returned.
+ *		data cache and the maximum possible number of elements and
+ *		bytes and the RDCC_W0 value in the raw data chunk cache.  Any
+ *		(or all) arguments may be null pointers in which case the
+ *		corresponding datum is not returned.
  *
  * Return:	Success:	SUCCEED
  *
@@ -1953,13 +1961,14 @@ H5Pset_cache(hid_t plist_id, int mdc_nelmts, size_t rdcc_nbytes,
  *-------------------------------------------------------------------------
  */
 herr_t
-H5Pget_cache(hid_t plist_id, int *mdc_nelmts, size_t *rdcc_nbytes,
-	      double *rdcc_w0)
+H5Pget_cache(hid_t plist_id, int *mdc_nelmts,
+	     int *rdcc_nelmts, size_t *rdcc_nbytes, double *rdcc_w0)
 {
     H5F_access_t	*fapl = NULL;
     
     FUNC_ENTER (H5Pget_cache, FAIL);
-    H5TRACE4("e","i*Is*z*d",plist_id,mdc_nelmts,rdcc_nbytes,rdcc_w0);
+    H5TRACE5("e","i*Is*Is*z*d",plist_id,mdc_nelmts,rdcc_nelmts,rdcc_nbytes,
+             rdcc_w0);
 
     /* Check arguments */
     if (H5P_FILE_ACCESS!=H5P_get_class (plist_id) ||
@@ -1970,6 +1979,7 @@ H5Pget_cache(hid_t plist_id, int *mdc_nelmts, size_t *rdcc_nbytes,
 
     /* Get sizes */
     if (mdc_nelmts) *mdc_nelmts = fapl->mdc_nelmts;
+    if (rdcc_nelmts) *rdcc_nelmts = fapl->rdcc_nelmts;
     if (rdcc_nbytes) *rdcc_nbytes = fapl->rdcc_nbytes;
     if (rdcc_w0) *rdcc_w0 = fapl->rdcc_w0;
 
@@ -2103,6 +2113,7 @@ H5Pset_hyper_cache(hid_t plist_id, unsigned cache, unsigned limit)
     H5D_xfer_t		*plist = NULL;
     
     FUNC_ENTER (H5Pset_hyper_cache, FAIL);
+    H5TRACE3("e","iIuIu",plist_id,cache,limit);
 
     /* Check arguments */
     if (H5P_DATASET_XFER != H5P_get_class (plist_id) ||
@@ -2141,6 +2152,7 @@ H5Pget_hyper_cache(hid_t plist_id, unsigned *cache/*out*/, unsigned *limit/*out*
     H5D_xfer_t		*plist = NULL;
     
     FUNC_ENTER (H5Pget_hyper_cache, 0);
+    H5TRACE3("e","ixx",plist_id,cache,limit);
 
     /* Check arguments */
     if (H5P_DATASET_XFER != H5P_get_class (plist_id) ||
diff --git a/src/H5Ppublic.h b/src/H5Ppublic.h
index d82bb83..123ec41 100644
--- a/src/H5Ppublic.h
+++ b/src/H5Ppublic.h
@@ -101,9 +101,10 @@ H5Z_filter_t H5Pget_filter(hid_t plist_id, int filter,
 			   unsigned cd_values[]/*out*/,
 			   size_t namelen, char name[]);
 herr_t H5Pset_deflate (hid_t plist_id, unsigned aggression);
-herr_t H5Pset_cache (hid_t plist_id, int mdc_nelmts, size_t rdcc_nbytes,
-		     double rdcc_w0);
-herr_t H5Pget_cache (hid_t plist_id, int *mdc_nelmts, size_t *rdcc_nbytes,
+herr_t H5Pset_cache (hid_t plist_id, int mdc_nelmts, int rdcc_nelmts,
+		     size_t rdcc_nbytes, double rdcc_w0);
+herr_t H5Pget_cache (hid_t plist_id, int *mdc_nelmts/*out*/,
+		     int *rdcc_nelmts/*out*/, size_t *rdcc_nbytes/*out*/,
 		     double *rdcc_w0);
 herr_t H5Pset_hyper_cache(hid_t plist_id, unsigned cache, unsigned limit);
 herr_t H5Pget_hyper_cache(hid_t plist_id, unsigned *cache, unsigned *limit);