From 53a9be37483c91015e4f3060490bec4a3a1495c2 Mon Sep 17 00:00:00 2001 From: Quincey Koziol Date: Sat, 12 Jun 2004 20:00:24 -0500 Subject: [svn-r8666] Purpose: Code optimization Description: Restructure conversion loop of variable-length objects to avoid walking through memory backwards and allocating as many temporary buffers. (This uses the optimized method used in the atomic type conversions) Platforms tested: Solaris 2.7 (arabica) FreeBSD 4.10 (sleipnir) w/parallel Too minor to require h5committest --- src/H5Tconv.c | 309 ++++++++++++++++++++++++++----------------------------- src/H5Tprivate.h | 2 +- src/H5Tvlen.c | 1 - 3 files changed, 147 insertions(+), 165 deletions(-) diff --git a/src/H5Tconv.c b/src/H5Tconv.c index 89b33de..929ddbc 100644 --- a/src/H5Tconv.c +++ b/src/H5Tconv.c @@ -2420,20 +2420,21 @@ done: */ herr_t H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts, - size_t buf_stride, size_t bkg_stride, void *_buf, - void *_bkg, hid_t dxpl_id) + size_t buf_stride, size_t bkg_stride, void *buf, + void *bkg, hid_t dxpl_id) { H5T_vlen_alloc_info_t vl_alloc_info;/* VL allocation information */ H5T_path_t *tpath; /* Type conversion path */ hid_t tsrc_id = -1, tdst_id = -1;/*temporary type atoms */ H5T_t *src = NULL; /*source data type */ H5T_t *dst = NULL; /*destination data type */ - hsize_t olap; /*num overlapping elements */ - uint8_t *s, *sp, *d, *dp; /*source and dest traversal ptrs */ - uint8_t **dptr; /*pointer to correct destination pointer*/ - uint8_t *bg_ptr=NULL; /*background buf traversal pointer */ H5HG_t bg_hobjid, parent_hobjid; - size_t src_delta, dst_delta, bkg_delta;/*source & destination stride*/ + uint8_t *s; /*source buffer */ + uint8_t *d; /*destination buffer */ + uint8_t *b; /*background buffer */ + ssize_t s_stride, d_stride; /*src and dst strides */ + ssize_t b_stride; /*bkg stride */ + size_t safe; /*how many elements are safe to process in each pass */ hssize_t seq_len; /*the number of elements in the current sequence*/ hsize_t bg_seq_len=0, parent_seq_len=0; size_t src_base_size, dst_base_size;/*source & destination base size*/ @@ -2442,8 +2443,6 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts, size_t conv_buf_size=0; /*size of conversion buffer in bytes */ void *tmp_buf=NULL; /*temporary background buffer */ size_t tmp_buf_size=0; /*size of temporary bkg buffer */ - void *dbuf=NULL; /*temp destination buffer */ - int direction; /*direction of traversal */ int nested=0; /*flag of nested VL case */ hsize_t elmtno; /*element number counter */ hsize_t i; @@ -2483,57 +2482,23 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts, NULL == (dst = H5I_object(dst_id))) HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data type"); - /* - * Do we process the values from beginning to end or vice - * versa? Also, how many of the elements have the source and - * destination areas overlapping? - */ - if (src->size==dst->size || buf_stride>0) { - olap = nelmts; - sp = dp = (uint8_t*)_buf; - bg_ptr = (uint8_t*)_bkg; - direction = 1; - } else if (src->size>=dst->size) { - /* potentially this uses the destination buffer 1 extra - * time, but its faster that floating-point calcs */ - olap = ((dst->size)/(src->size-dst->size))+1; - sp = dp = (uint8_t*)_buf; - bg_ptr = (uint8_t*)_bkg; - direction = 1; - } else { - /* potentially this uses the destination buffer 1 extra - * time, but its faster that floating-point calcs */ - olap = nelmts-(((src->size)/(dst->size-src->size))+1); - sp = (uint8_t*)_buf + (nelmts-1) * - (buf_stride ? buf_stride : src->size); - dp = (uint8_t*)_buf + (nelmts-1) * - (buf_stride ? buf_stride : dst->size); - if(_bkg!=NULL) - bg_ptr = (uint8_t*)_bkg + (nelmts-1) * - (bkg_stride ? bkg_stride : dst->size); - direction = -1; - } - - /* - * Direction & size of buffer traversal. - */ - src_delta = direction * (buf_stride ? buf_stride : src->size); - dst_delta = direction * (buf_stride ? buf_stride : dst->size); - bkg_delta = direction * (bkg_stride ? bkg_stride : dst->size); - - /* Dynamically allocate the destination buffer */ - if ((dbuf=H5FL_BLK_MALLOC(vlen_seq,dst->size))==NULL) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion"); - - /* - * If the source and destination buffers overlap then use a - * temporary buffer for the destination. - */ - if (direction>0) { - dptr = (uint8_t **)&dbuf; + /* Initialize source & destination strides */ + if (buf_stride) { + assert(buf_stride>=src->size); + assert(buf_stride>=dst->size); + s_stride = d_stride = buf_stride; } else { - dptr = &dp; + s_stride = src->size; + d_stride = dst->size; } + if(bkg) { + if(bkg_stride) + b_stride=bkg_stride; + else + b_stride=d_stride; + } /* end if */ + else + b_stride=0; /* Get the size of the base types in src & dst */ src_base_size=H5T_get_size(src->parent); @@ -2561,121 +2526,142 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts, HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "unable to retrieve VL allocation info"); /* Set the flag for nested VL case */ - if(dst->u.vlen.f!=NULL && H5T_detect_class(dst->parent,H5T_VLEN) && bg_ptr!=NULL) + if(dst->u.vlen.f!=NULL && H5T_detect_class(dst->parent,H5T_VLEN) && bkg!=NULL) nested=1; - for (elmtno=0; elmtnou.vlen.isnull))(src->u.vlen.f,s)) { - /* Write "nil" sequence to destination location */ - if((*(dst->u.vlen.setnull))(dst->u.vlen.f,dxpl_id,d,bg_ptr)<0) - HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't set VL data to 'nil'"); + /* The outer loop of the type conversion macro, controlling which */ + /* direction the buffer is walked */ + while (nelmts>0) { + /* Check if we need to go backwards through the buffer */ + if(d_stride>s_stride) { + /* Compute the number of "safe" destination elements at */ + /* the end of the buffer (Those which don't overlap with */ + /* any source elements at the beginning of the buffer) */ + safe=nelmts-(((nelmts*s_stride)+(d_stride-1))/d_stride); + + /* If we're down to the last few elements, just wrap up */ + /* with a "real" reverse copy */ + if(safe<2) { + s = (uint8_t*)buf+(nelmts-1)*s_stride; + d = (uint8_t*)buf+(nelmts-1)*d_stride; + b = (uint8_t*)bkg+(nelmts-1)*b_stride; + s_stride = -s_stride; + d_stride = -d_stride; + b_stride = -b_stride; + + safe=nelmts; + } /* end if */ + else { + s = (uint8_t*)buf+(nelmts-safe)*s_stride; + d = (uint8_t*)buf+(nelmts-safe)*d_stride; + b = (uint8_t*)bkg+(nelmts-safe)*b_stride; + } /* end else */ } /* end if */ else { - /* Get length of element sequences */ - if((seq_len=(*(src->u.vlen.getlen))(s))<0) - HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "incorrect length"); - H5_CHECK_OVERFLOW(seq_len,hssize_t,size_t); - src_size=(size_t)seq_len*src_base_size; - dst_size=(size_t)seq_len*dst_base_size; - - /* Check if conversion buffer is large enough, resize if - * necessary */ - if(conv_buf_sizeu.vlen.read))(src->u.vlen.f,dxpl_id,s,conv_buf,src_size)<0) - HGOTO_ERROR(H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL data"); - - /* Check if temporary buffer is large enough, resize if necessary */ - /* (Chain off the conversion buffer size) */ - if((tpath->cdata.need_bkg || H5T_detect_class(dst->parent, H5T_VLEN)) - && tmp_buf_sizeu.vlen.isnull))(src->u.vlen.f,s)) { + /* Write "nil" sequence to destination location */ + if((*(dst->u.vlen.setnull))(dst->u.vlen.f,dxpl_id,d,b)<0) + HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't set VL data to 'nil'"); } /* end if */ + else { + /* Get length of element sequences */ + if((seq_len=(*(src->u.vlen.getlen))(s))<0) + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "incorrect length"); + H5_CHECK_OVERFLOW(seq_len,hssize_t,size_t); + src_size=(size_t)seq_len*src_base_size; + dst_size=(size_t)seq_len*dst_base_size; + + /* Check if conversion buffer is large enough, resize if + * necessary */ + if(conv_buf_size0) { - H5_CHECK_OVERFLOW( bg_seq_len*MAX(src_base_size,dst_base_size) ,hsize_t,size_t); - if(tmp_buf_size<(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size))) { - tmp_buf_size=(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size)); - if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf, tmp_buf_size))==NULL) - HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion"); - } - H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp, &(bg_hobjid.addr)); - INT32DECODE(tmp, bg_hobjid.idx); - if(H5HG_read(dst->u.vlen.f,dxpl_id,&bg_hobjid,tmp_buf)==NULL) - HGOTO_ERROR (H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL sequence into background buffer"); + /* Read in VL sequence */ + if((*(src->u.vlen.read))(src->u.vlen.f,dxpl_id,s,conv_buf,src_size)<0) + HGOTO_ERROR(H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL data"); + + /* Check if temporary buffer is large enough, resize if necessary */ + /* (Chain off the conversion buffer size) */ + if(tmp_buf && tmp_buf_size0) { + H5_CHECK_OVERFLOW( bg_seq_len*MAX(src_base_size,dst_base_size) ,hsize_t,size_t); + if(tmp_buf_size<(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size))) { + tmp_buf_size=(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size)); + if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf, tmp_buf_size))==NULL) + HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion"); + } + H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp, &(bg_hobjid.addr)); + INT32DECODE(tmp, bg_hobjid.idx); + if(H5HG_read(dst->u.vlen.f,dxpl_id,&bg_hobjid,tmp_buf)==NULL) + HGOTO_ERROR (H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL sequence into background buffer"); + } /* end if */ + + /* If the sequence gets shorter, pad out the original sequence with zeros */ + H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t); + if((hssize_t)bg_seq_lenu.vlen.write))(dst->u.vlen.f,dxpl_id,&vl_alloc_info,d,conv_buf, bg_ptr, (hsize_t)seq_len,(hsize_t)dst_base_size)<0) - HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't write VL data"); - - /* For nested VL case, free leftover heap objects from the deeper level if the length of new data elements is shorted than the old data elements.*/ - H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t); - if(nested && seq_len<(hssize_t)bg_seq_len) { - uint8_t *tmp_p=tmp_buf; - tmp_p += seq_len*dst_base_size; - for(i=0; i<(bg_seq_len-seq_len); i++) { - UINT32DECODE(tmp_p, parent_seq_len); - if(parent_seq_len>0) { - H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp_p, &(parent_hobjid.addr)); - INT32DECODE(tmp_p, parent_hobjid.idx); - if(H5HG_remove(dst->u.vlen.f, dxpl_id,&parent_hobjid)<0) - HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "Unable to remove heap object"); + /* Convert VL sequence */ + H5_CHECK_OVERFLOW(seq_len,hssize_t,hsize_t); + if (H5T_convert(tpath, tsrc_id, tdst_id, (hsize_t)seq_len, 0, 0, conv_buf, tmp_buf, dxpl_id)<0) + HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "datatype conversion failed"); + + /* Write sequence to destination location */ + if((*(dst->u.vlen.write))(dst->u.vlen.f,dxpl_id,&vl_alloc_info,d,conv_buf, b, (hsize_t)seq_len,(hsize_t)dst_base_size)<0) + HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't write VL data"); + + /* For nested VL case, free leftover heap objects from the deeper level if the length of new data elements is shorter than the old data elements.*/ + H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t); + if(nested && seq_len<(hssize_t)bg_seq_len) { + uint8_t *tmp_p=tmp_buf; + tmp_p += seq_len*dst_base_size; + for(i=0; i<(bg_seq_len-seq_len); i++) { + UINT32DECODE(tmp_p, parent_seq_len); + if(parent_seq_len>0) { + H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp_p, &(parent_hobjid.addr)); + INT32DECODE(tmp_p, parent_hobjid.idx); + if(H5HG_remove(dst->u.vlen.f, dxpl_id,&parent_hobjid)<0) + HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "Unable to remove heap object"); + } } } - } - } /* end else */ - - /* - * If we had used a temporary buffer for the destination - * then we should copy the value to the true destination - * buffer. - */ - if (d==dbuf) HDmemcpy (dp, d, dst->size); - sp += src_delta; - dp += dst_delta; - if(bg_ptr!=NULL) - bg_ptr += bkg_delta; - - /* switch destination pointer around when the olap gets to 0 */ - if(--olap==0) { - if(dptr==(uint8_t **)&dbuf) - dptr=&dp; - else - dptr=(uint8_t **)&dbuf; - } /* end if */ - } + } /* end else */ + + /* Advance pointers */ + s += s_stride; + d += d_stride; + b += b_stride; + } /* end for */ + + /* Decrement number of elements left to convert */ + nelmts-=safe; + } /* end while */ /* Release the temporary datatype IDs used */ if (tsrc_id >= 0) @@ -2695,9 +2681,6 @@ done: /* Release the background buffer, if we have one */ if(tmp_buf!=NULL) H5FL_BLK_FREE(vlen_seq,tmp_buf); - /* Release the destination buffer, if we have one */ - if(dbuf!=NULL) - H5FL_BLK_FREE(vlen_seq,dbuf); FUNC_LEAVE_NOAPI(ret_value); } diff --git a/src/H5Tprivate.h b/src/H5Tprivate.h index 41b5828..5c76e82 100644 --- a/src/H5Tprivate.h +++ b/src/H5Tprivate.h @@ -88,8 +88,8 @@ H5_DLL herr_t H5T_convert(H5T_path_t *tpath, hid_t src_id, hid_t dst_id, void *buf, void *bkg, hid_t dset_xfer_plist); H5_DLL herr_t H5T_vlen_reclaim(void *elem, hid_t type_id, hsize_t ndim, hssize_t *point, void *_op_data); H5_DLL herr_t H5T_vlen_get_alloc_info(hid_t dxpl_id, H5T_vlen_alloc_info_t *vl_alloc_info); -H5_DLL htri_t H5T_is_sensible(const H5T_t *dt); H5_DLL htri_t H5T_set_loc(H5T_t *dt, H5F_t *f, H5T_loc_t loc); +H5_DLL htri_t H5T_is_sensible(const H5T_t *dt); H5_DLL htri_t H5T_committed(H5T_t *type); H5_DLL int H5T_link(const H5T_t *type, int adjust, hid_t dxpl_id); diff --git a/src/H5Tvlen.c b/src/H5Tvlen.c index 09e67fb..a2ef55a 100644 --- a/src/H5Tvlen.c +++ b/src/H5Tvlen.c @@ -1091,4 +1091,3 @@ H5T_vlen_get_alloc_info(hid_t dxpl_id, H5T_vlen_alloc_info_t *vl_alloc_info) done: FUNC_LEAVE_NOAPI(ret_value) } /* end H5T_vlen_get_alloc_info() */ - -- cgit v0.12