From 580b2747db63dc2735832fdec6c35f8c3344836d Mon Sep 17 00:00:00 2001 From: Dana Robinson <43805+derobins@users.noreply.github.com> Date: Tue, 25 Apr 2023 10:03:57 -0700 Subject: Sanitize H5HG cache deserialization code (#2808) --- src/H5HGcache.c | 291 +++++++++++++++++++++++++++++--------------------------- src/H5HGpkg.h | 35 ++++--- 2 files changed, 173 insertions(+), 153 deletions(-) diff --git a/src/H5HGcache.c b/src/H5HGcache.c index 235a990..bbfae7c 100644 --- a/src/H5HGcache.c +++ b/src/H5HGcache.c @@ -13,10 +13,8 @@ /*------------------------------------------------------------------------- * * Created: H5HGcache.c - * Feb 5 2008 - * Quincey Koziol * - * Purpose: Implement global heap metadata cache methods. + * Purpose: Implement global heap metadata cache methods * *------------------------------------------------------------------------- */ @@ -30,12 +28,12 @@ /***********/ /* Headers */ /***********/ -#include "H5private.h" /* Generic Functions */ -#include "H5Eprivate.h" /* Error handling */ -#include "H5Fprivate.h" /* File access */ -#include "H5HGpkg.h" /* Global heaps */ -#include "H5MFprivate.h" /* File memory management */ -#include "H5MMprivate.h" /* Memory management */ +#include "H5private.h" /* Generic Functions */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5Fprivate.h" /* File access */ +#include "H5HGpkg.h" /* Global heaps */ +#include "H5MFprivate.h" /* File memory management */ +#include "H5MMprivate.h" /* Memory management */ /****************/ /* Local Macros */ @@ -63,7 +61,7 @@ static herr_t H5HG__cache_heap_serialize(const H5F_t *f, void *image, size_t len static herr_t H5HG__cache_heap_free_icr(void *thing); /* Prefix deserialization */ -static herr_t H5HG__hdr_deserialize(H5HG_heap_t *heap, const uint8_t *image, const H5F_t *f); +static herr_t H5HG__hdr_deserialize(H5HG_heap_t *heap, const uint8_t *image, size_t len, const H5F_t *f); /*********************/ /* Package Variables */ @@ -96,65 +94,64 @@ const H5AC_class_t H5AC_GHEAP[1] = {{ /*******************/ /*------------------------------------------------------------------------- - * Function: H5HG__hdr_deserialize() + * Function: H5HG__hdr_deserialize * - * Purpose: Decode a global heap's header - * - * Return: Success: SUCCEED - * Failure: FAIL - * - * Programmer: Quincey Koziol - * December 15, 2016 + * Purpose: Decode a global heap's header * + * Return: SUCCEED/FAIL *------------------------------------------------------------------------- */ static herr_t -H5HG__hdr_deserialize(H5HG_heap_t *heap, const uint8_t *image, const H5F_t *f) +H5HG__hdr_deserialize(H5HG_heap_t *heap, const uint8_t *image, size_t len, const H5F_t *f) { - herr_t ret_value = SUCCEED; /* Return value */ + const uint8_t *p_end = image + len - 1; /* End of image buffer */ + herr_t ret_value = SUCCEED; FUNC_ENTER_PACKAGE - /* Sanity check */ HDassert(heap); HDassert(image); HDassert(f); /* Magic number */ + if (H5_IS_BUFFER_OVERFLOW(image, H5_SIZEOF_MAGIC, p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, FAIL, "ran off end of input buffer while decoding"); if (HDmemcmp(image, H5HG_MAGIC, (size_t)H5_SIZEOF_MAGIC) != 0) HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, FAIL, "bad global heap collection signature") image += H5_SIZEOF_MAGIC; /* Version */ + if (H5_IS_BUFFER_OVERFLOW(image, 1, p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, FAIL, "ran off end of input buffer while decoding"); if (H5HG_VERSION != *image++) HGOTO_ERROR(H5E_HEAP, H5E_VERSION, FAIL, "wrong version number in global heap") /* Reserved */ + if (H5_IS_BUFFER_OVERFLOW(image, 3, p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, FAIL, "ran off end of input buffer while decoding"); image += 3; /* Size */ + if (H5_IS_BUFFER_OVERFLOW(image, H5F_sizeof_size(f), p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, FAIL, "ran off end of input buffer while decoding"); H5F_DECODE_LENGTH(f, image, heap->size); - HDassert(heap->size >= H5HG_MINSIZE); + if (heap->size < H5HG_MINSIZE) + HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, FAIL, "global heap size is too small"); done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5HG__hdr_deserialize() */ /*------------------------------------------------------------------------- - * Function: H5HG__cache_heap_get_initial_load_size() - * - * Purpose: Return the initial speculative read size to the metadata - * cache. This size will be used in the initial attempt to read - * the global heap. If this read is too small, the cache will - * try again with the correct value obtained from - * H5HG__cache_get_final_load_size(). + * Function: H5HG__cache_heap_get_initial_load_size * - * Return: Success: SUCCEED - * Failure: FAIL - * - * Programmer: John Mainzer - * 7/27/14 + * Purpose: Return the initial speculative read size to the metadata + * cache. This size will be used in the initial attempt to read + * the global heap. If this read is too small, the cache will + * try again with the correct value obtained from + * H5HG__cache_get_final_load_size(). * + * Return: SUCCEED/FAIL *------------------------------------------------------------------------- */ static herr_t @@ -162,39 +159,30 @@ H5HG__cache_heap_get_initial_load_size(void H5_ATTR_UNUSED *_udata, size_t *imag { FUNC_ENTER_PACKAGE_NOERR - /* Sanity check */ HDassert(image_len); - /* Set the image length size */ - *image_len = (size_t)H5HG_MINSIZE; + *image_len = H5HG_MINSIZE; FUNC_LEAVE_NOAPI(SUCCEED) } /* end H5HG__cache_heap_get_initial_load_size() */ /*------------------------------------------------------------------------- - * Function: H5HG__cache_heap_get_initial_load_size() - * - * Purpose: Return the final read size for a speculatively ready heap to - * the metadata cache. + * Function: H5HG__cache_heap_get_final_load_size * - * Return: Success: SUCCEED - * Failure: FAIL - * - * Programmer: Quincey Koziol - * November 18, 2016 + * Purpose: Return the final read size for a speculatively ready heap to + * the metadata cache. * + * Return: SUCCEED/FAIL *------------------------------------------------------------------------- */ static herr_t -H5HG__cache_heap_get_final_load_size(const void *image, size_t H5_ATTR_NDEBUG_UNUSED image_len, void *udata, - size_t *actual_len) +H5HG__cache_heap_get_final_load_size(const void *image, size_t image_len, void *udata, size_t *actual_len) { - H5HG_heap_t heap; /* Global heap */ - herr_t ret_value = SUCCEED; /* Return value */ + H5HG_heap_t heap; + herr_t ret_value = SUCCEED; FUNC_ENTER_PACKAGE - /* Sanity check */ HDassert(image); HDassert(udata); HDassert(actual_len); @@ -202,10 +190,10 @@ H5HG__cache_heap_get_final_load_size(const void *image, size_t H5_ATTR_NDEBUG_UN HDassert(image_len == H5HG_MINSIZE); /* Deserialize the heap's header */ - if (H5HG__hdr_deserialize(&heap, (const uint8_t *)image, (const H5F_t *)udata) < 0) + if (H5HG__hdr_deserialize(&heap, (const uint8_t *)image, image_len, (const H5F_t *)udata) < 0) HGOTO_ERROR(H5E_HEAP, H5E_CANTDECODE, FAIL, "can't decode global heap prefix") - /* Set the final size for the cache image */ + /* Set the actual global heap size */ *actual_len = heap.size; done: @@ -215,31 +203,28 @@ done: /*------------------------------------------------------------------------- * Function: H5HG__cache_heap_deserialize * - * Purpose: Given a buffer containing the on disk image of the global - * heap, deserialize it, load its contents into a newly allocated - * instance of H5HG_heap_t, and return a pointer to the new instance. - * - * Return: Success: Pointer to in core representation - * Failure: NULL - * - * Programmer: John Mainzer - * 7/27/14 + * Purpose: Given a buffer containing the on disk image of the global + * heap, deserialize it, load its contents into a newly allocated + * instance of H5HG_heap_t, and return a pointer to the new + * instance. * + * Return: Success: Pointer to a new global heap + * Failure: NULL *------------------------------------------------------------------------- */ static void * H5HG__cache_heap_deserialize(const void *_image, size_t len, void *_udata, hbool_t H5_ATTR_UNUSED *dirty) { - H5F_t *f = (H5F_t *)_udata; /* File pointer -- obtained from user data */ - H5HG_heap_t *heap = NULL; /* New global heap */ - uint8_t *image; /* Pointer to image to decode */ - size_t max_idx = 0; /* Maximum heap object index seen */ - size_t nalloc; /* Number of objects allocated */ - void *ret_value = NULL; /* Return value */ + H5F_t *f = (H5F_t *)_udata; /* File pointer */ + H5HG_heap_t *heap = NULL; /* New global heap */ + uint8_t *p = NULL; /* Pointer to objects in (copied) image buffer */ + const uint8_t *p_end = NULL; /* End of (copied) image buffer */ + size_t max_idx = 0; /* Maximum heap object index seen */ + size_t nalloc = 0; /* Number of objects allocated */ + void *ret_value = NULL; FUNC_ENTER_PACKAGE - /* Sanity checks */ HDassert(_image); HDassert(len >= (size_t)H5HG_MINSIZE); HDassert(f); @@ -252,15 +237,28 @@ H5HG__cache_heap_deserialize(const void *_image, size_t len, void *_udata, hbool if (NULL == (heap->chunk = H5FL_BLK_MALLOC(gheap_chunk, len))) HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed") - /* Copy the image buffer into the newly allocate chunk */ + /* Copy the image buffer into the newly allocated chunk */ H5MM_memcpy(heap->chunk, _image, len); + /* Set p_end + * + * Note that parsing moves along p / heap->chunk, so p_end + * has to refer to the end of that buffer and NOT _image + */ + p_end = heap->chunk + len - 1; + /* Deserialize the heap's header */ - if (H5HG__hdr_deserialize(heap, (const uint8_t *)heap->chunk, f) < 0) + if (H5_IS_BUFFER_OVERFLOW(heap->chunk, H5HG_SIZEOF_HDR(f), p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, NULL, "ran off end of input buffer while decoding"); + if (H5HG__hdr_deserialize(heap, (const uint8_t *)heap->chunk, len, f) < 0) HGOTO_ERROR(H5E_HEAP, H5E_CANTDECODE, NULL, "can't decode global heap header") /* Decode each object */ - image = heap->chunk + H5HG_SIZEOF_HDR(f); + + /* Set the p pointer to the objects in heap->chunk */ + p = heap->chunk + H5HG_SIZEOF_HDR(f); + + /* Set the number of allocated objects */ nalloc = H5HG_NOBJS(f, heap->size); /* Calloc the obj array because the file format spec makes no guarantee @@ -270,32 +268,43 @@ H5HG__cache_heap_deserialize(const void *_image, size_t len, void *_udata, hbool HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, "memory allocation failed") heap->nalloc = nalloc; - while (image < (heap->chunk + heap->size)) { - if ((image + H5HG_SIZEOF_OBJHDR(f)) > (heap->chunk + heap->size)) { - /* - * The last bit of space is too tiny for an object header, so + while (p < (heap->chunk + heap->size)) { + + if ((p + H5HG_SIZEOF_OBJHDR(f)) > (heap->chunk + heap->size)) { + + /* The last bit of space is too tiny for an object header, so * we assume that it's free space. */ - HDassert(NULL == heap->obj[0].begin); - heap->obj[0].size = (size_t)(((const uint8_t *)heap->chunk + heap->size) - image); - heap->obj[0].begin = image; - image += heap->obj[0].size; - } /* end if */ + if (NULL != heap->obj[0].begin) + HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, NULL, "object 0 should not be set"); + heap->obj[0].size = (size_t)(((const uint8_t *)heap->chunk + heap->size) - p); + heap->obj[0].begin = p; + + /* No buffer overflow check here since this just moves the pointer + * to the end of the buffer, which was calculated above + */ + p += heap->obj[0].size; + } else { - size_t need = 0; - unsigned idx; - uint8_t *begin = image; + size_t need = 0; /* # bytes needed to store the object */ + unsigned idx; /* Heap object index */ + uint8_t *begin = p; /* Pointer to start of object */ - UINT16DECODE(image, idx); + /* Parse a normal heap entry */ + + if (H5_IS_BUFFER_OVERFLOW(p, 2, p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, NULL, "ran off end of input buffer while decoding"); + UINT16DECODE(p, idx); /* Check if we need more room to store heap objects */ if (idx >= heap->nalloc) { size_t new_alloc; /* New allocation number */ - H5HG_obj_t *new_obj; /* New array of object descriptions */ + H5HG_obj_t *new_obj; /* New array of object descriptions */ /* Determine the new number of objects to index */ new_alloc = MAX(heap->nalloc * 2, (idx + 1)); - HDassert(idx < new_alloc); + if (idx >= new_alloc) + HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, NULL, "inappropriate heap index") /* Reallocate array of objects */ if (NULL == (new_obj = H5FL_SEQ_REALLOC(H5HG_obj_t, heap->obj, new_alloc))) @@ -307,16 +316,32 @@ H5HG__cache_heap_deserialize(const void *_image, size_t len, void *_udata, hbool /* Update heap information */ heap->nalloc = new_alloc; heap->obj = new_obj; - HDassert(heap->nalloc > heap->nused); - } /* end if */ - - UINT16DECODE(image, heap->obj[idx].nrefs); - image += 4; /*reserved*/ - H5F_DECODE_LENGTH(f, image, heap->obj[idx].size); + if (heap->nalloc <= heap->nused) + HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, NULL, "inappropriate # allocated slots") + } + + /* Number of references */ + if (H5_IS_BUFFER_OVERFLOW(p, 2, p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, NULL, "ran off end of input buffer while decoding"); + UINT16DECODE(p, heap->obj[idx].nrefs); + + /* Reserved bytes */ + if (H5_IS_BUFFER_OVERFLOW(p, 4, p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, NULL, "ran off end of input buffer while decoding"); + p += 4; + + /* Object length */ + if (H5_IS_BUFFER_OVERFLOW(p, H5F_sizeof_size(f), p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, NULL, "ran off end of input buffer while decoding"); + H5F_DECODE_LENGTH(f, p, heap->obj[idx].size); + + /* Object + * + * Points to beginning of object, INCLUDING the header. + */ heap->obj[idx].begin = begin; - /* - * The total storage size includes the size of the object + /* The total storage size includes the size of the object * header and is zero padded so the next object header is * properly aligned. The entire obj array was calloc'ed, * so no need to zero the space here. The last bit of space @@ -327,26 +352,33 @@ H5HG__cache_heap_deserialize(const void *_image, size_t len, void *_udata, hbool need = H5HG_SIZEOF_OBJHDR(f) + H5HG_ALIGN(heap->obj[idx].size); if (idx > max_idx) max_idx = idx; - } /* end if */ + } else need = heap->obj[idx].size; - image = begin + need; - } /* end else */ - } /* end while */ - - /* Sanity checks */ - HDassert(image == heap->chunk + heap->size); - HDassert(H5HG_ISALIGNED(heap->obj[0].size)); - - /* Set the next index value to use */ + /* Make sure the extra padding doesn't cause us to overrun + * the buffer + */ + if (H5_IS_BUFFER_OVERFLOW(begin, need, p_end)) + HGOTO_ERROR(H5E_HEAP, H5E_OVERFLOW, NULL, "ran off end of input buffer while decoding"); + p = begin + need; + } + } + + /* Post-parse checks */ + if (p != heap->chunk + heap->size) + HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, NULL, "partially decoded global heap"); + if (FALSE == H5HG_ISALIGNED(heap->obj[0].size)) + HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, NULL, "decoded global heap is not aligned"); + + /* Set the next index value to use when creating a new object */ if (max_idx > 0) heap->nused = max_idx + 1; else heap->nused = 1; - /* Sanity check */ - HDassert(max_idx < heap->nused); + if (max_idx >= heap->nused) + HGOTO_ERROR(H5E_HEAP, H5E_BADVALUE, NULL, "bad `next unused` heap index value"); /* Add the new heap to the CWFS list for the file */ if (H5F_cwfs_add(f, heap) < 0) @@ -365,15 +397,10 @@ done: /*------------------------------------------------------------------------- * Function: H5HG__cache_heap_image_len * - * Purpose: Return the on disk image size of the global heap to the - * metadata cache via the image_len. - * - * Return: Success: SUCCEED - * Failure: FAIL - * - * Programmer: John Mainzer - * 7/27/14 + * Purpose: Return the on disk image size of the global heap to the + * metadata cache via the image_len. * + * Return: SUCCEED/FAIL *------------------------------------------------------------------------- */ static herr_t @@ -383,7 +410,6 @@ H5HG__cache_heap_image_len(const void *_thing, size_t *image_len) FUNC_ENTER_PACKAGE_NOERR - /* Sanity checks */ HDassert(heap); HDassert(heap->cache_info.magic == H5C__H5C_CACHE_ENTRY_T_MAGIC); HDassert(heap->cache_info.type == H5AC_GHEAP); @@ -398,17 +424,12 @@ H5HG__cache_heap_image_len(const void *_thing, size_t *image_len) /*------------------------------------------------------------------------- * Function: H5HG__cache_heap_serialize * - * Purpose: Given an appropriately sized buffer and an instance of - * H5HG_heap_t, serialize the global heap for writing to file, - * and copy the serialized version into the buffer. - * + * Purpose: Given an appropriately sized buffer and an instance of + * H5HG_heap_t, serialize the global heap for writing to file, + * and copy the serialized version into the buffer. * - * Return: Success: SUCCEED - * Failure: FAIL - * - * Programmer: John Mainzer - * 7/27/14 * + * Return: SUCCEED/FAIL *------------------------------------------------------------------------- */ static herr_t @@ -426,7 +447,7 @@ H5HG__cache_heap_serialize(const H5F_t H5_ATTR_NDEBUG_UNUSED *f, void *image, si HDassert(heap->size == len); HDassert(heap->chunk); - /* copy the image into the buffer */ + /* Copy the image into the buffer */ H5MM_memcpy(image, heap->chunk, len); FUNC_LEAVE_NOAPI(SUCCEED) @@ -435,29 +456,23 @@ H5HG__cache_heap_serialize(const H5F_t H5_ATTR_NDEBUG_UNUSED *f, void *image, si /*------------------------------------------------------------------------- * Function: H5HG__cache_heap_free_icr * - * Purpose: Free the in memory representation of the supplied global heap. - * - * Note: The metadata cache sets the object's cache_info.magic to - * H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC before calling a free_icr - * callback (checked in assert). - * - * Return: Success: SUCCEED - * Failure: FAIL + * Purpose: Free the in memory representation of the supplied global heap. * - * Programmer: John Mainzer - * 7/27/14 + * Note: The metadata cache sets the object's cache_info.magic to + * H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC before calling a free_icr + * callback (checked in assert). * + * Return: SUCCEED/FAIL *------------------------------------------------------------------------- */ static herr_t H5HG__cache_heap_free_icr(void *_thing) { H5HG_heap_t *heap = (H5HG_heap_t *)_thing; - herr_t ret_value = SUCCEED; /* Return value */ + herr_t ret_value = SUCCEED; FUNC_ENTER_PACKAGE - /* Sanity checks */ HDassert(heap); HDassert(heap->cache_info.magic == H5C__H5C_CACHE_ENTRY_T_BAD_MAGIC); HDassert(heap->cache_info.type == H5AC_GHEAP); diff --git a/src/H5HGpkg.h b/src/H5HGpkg.h index ab7cd09..99725d8 100644 --- a/src/H5HGpkg.h +++ b/src/H5HGpkg.h @@ -103,27 +103,32 @@ H5FL_BLK_EXTERN(gheap_chunk); /****************************/ typedef struct H5HG_obj_t { - int nrefs; /* reference count */ - size_t size; /* total size of object */ - uint8_t *begin; /* ptr to object into heap->chunk */ + int nrefs; /* Reference count */ + size_t size; /* Total size of object */ + uint8_t *begin; /* Pointer to object into heap->chunk (INCLUDES header) */ } H5HG_obj_t; /* Forward declarations for fields */ struct H5F_shared_t; struct H5HG_heap_t { - H5AC_info_t cache_info; /* Information for H5AC cache functions, _must_ be */ - /* first field in structure */ - haddr_t addr; /*collection address */ - size_t size; /*total size of collection */ - uint8_t *chunk; /*the collection, incl. header */ - size_t nalloc; /*numb object slots allocated */ - size_t nused; /*number of slots used */ - /* If this value is >65535 then all indices */ - /* have been used at some time and the */ - /* correct new index should be searched for */ - struct H5F_shared_t *shared; /* shared file */ - H5HG_obj_t *obj; /*array of object descriptions */ + H5AC_info_t cache_info; /* Information for H5AC cache functions, MUST be + * the first field in structure + */ + haddr_t addr; /* Collection address */ + size_t size; /* Total size of collection */ + uint8_t *chunk; /* Collection of elements - note that this + * INCLUDES the header, so it's not just + * the objects! + */ + size_t nalloc; /* # object slots allocated */ + size_t nused; /* # of slots used + * If this value is >65535 then all indices + * have been used at some time and the + * correct new index should be searched for + */ + struct H5F_shared_t *shared; /* Shared file */ + H5HG_obj_t *obj; /* Array of object descriptions */ }; /******************************/ -- cgit v0.12