summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2004-06-13 01:00:24 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2004-06-13 01:00:24 (GMT)
commit53a9be37483c91015e4f3060490bec4a3a1495c2 (patch)
tree51286fdb6c54930601fb8b6359d910ca3e52a744
parent8b247f8aa3d958bbf8f4c8515ab60a4daedbdf5c (diff)
downloadhdf5-53a9be37483c91015e4f3060490bec4a3a1495c2.zip
hdf5-53a9be37483c91015e4f3060490bec4a3a1495c2.tar.gz
hdf5-53a9be37483c91015e4f3060490bec4a3a1495c2.tar.bz2
[svn-r8666] Purpose:
Code optimization Description: Restructure conversion loop of variable-length objects to avoid walking through memory backwards and allocating as many temporary buffers. (This uses the optimized method used in the atomic type conversions) Platforms tested: Solaris 2.7 (arabica) FreeBSD 4.10 (sleipnir) w/parallel Too minor to require h5committest
-rw-r--r--src/H5Tconv.c309
-rw-r--r--src/H5Tprivate.h2
-rw-r--r--src/H5Tvlen.c1
3 files changed, 147 insertions, 165 deletions
diff --git a/src/H5Tconv.c b/src/H5Tconv.c
index 89b33de..929ddbc 100644
--- a/src/H5Tconv.c
+++ b/src/H5Tconv.c
@@ -2420,20 +2420,21 @@ done:
*/
herr_t
H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
- size_t buf_stride, size_t bkg_stride, void *_buf,
- void *_bkg, hid_t dxpl_id)
+ size_t buf_stride, size_t bkg_stride, void *buf,
+ void *bkg, hid_t dxpl_id)
{
H5T_vlen_alloc_info_t vl_alloc_info;/* VL allocation information */
H5T_path_t *tpath; /* Type conversion path */
hid_t tsrc_id = -1, tdst_id = -1;/*temporary type atoms */
H5T_t *src = NULL; /*source data type */
H5T_t *dst = NULL; /*destination data type */
- hsize_t olap; /*num overlapping elements */
- uint8_t *s, *sp, *d, *dp; /*source and dest traversal ptrs */
- uint8_t **dptr; /*pointer to correct destination pointer*/
- uint8_t *bg_ptr=NULL; /*background buf traversal pointer */
H5HG_t bg_hobjid, parent_hobjid;
- size_t src_delta, dst_delta, bkg_delta;/*source & destination stride*/
+ uint8_t *s; /*source buffer */
+ uint8_t *d; /*destination buffer */
+ uint8_t *b; /*background buffer */
+ ssize_t s_stride, d_stride; /*src and dst strides */
+ ssize_t b_stride; /*bkg stride */
+ size_t safe; /*how many elements are safe to process in each pass */
hssize_t seq_len; /*the number of elements in the current sequence*/
hsize_t bg_seq_len=0, parent_seq_len=0;
size_t src_base_size, dst_base_size;/*source & destination base size*/
@@ -2442,8 +2443,6 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
size_t conv_buf_size=0; /*size of conversion buffer in bytes */
void *tmp_buf=NULL; /*temporary background buffer */
size_t tmp_buf_size=0; /*size of temporary bkg buffer */
- void *dbuf=NULL; /*temp destination buffer */
- int direction; /*direction of traversal */
int nested=0; /*flag of nested VL case */
hsize_t elmtno; /*element number counter */
hsize_t i;
@@ -2483,57 +2482,23 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
NULL == (dst = H5I_object(dst_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data type");
- /*
- * Do we process the values from beginning to end or vice
- * versa? Also, how many of the elements have the source and
- * destination areas overlapping?
- */
- if (src->size==dst->size || buf_stride>0) {
- olap = nelmts;
- sp = dp = (uint8_t*)_buf;
- bg_ptr = (uint8_t*)_bkg;
- direction = 1;
- } else if (src->size>=dst->size) {
- /* potentially this uses the destination buffer 1 extra
- * time, but its faster that floating-point calcs */
- olap = ((dst->size)/(src->size-dst->size))+1;
- sp = dp = (uint8_t*)_buf;
- bg_ptr = (uint8_t*)_bkg;
- direction = 1;
- } else {
- /* potentially this uses the destination buffer 1 extra
- * time, but its faster that floating-point calcs */
- olap = nelmts-(((src->size)/(dst->size-src->size))+1);
- sp = (uint8_t*)_buf + (nelmts-1) *
- (buf_stride ? buf_stride : src->size);
- dp = (uint8_t*)_buf + (nelmts-1) *
- (buf_stride ? buf_stride : dst->size);
- if(_bkg!=NULL)
- bg_ptr = (uint8_t*)_bkg + (nelmts-1) *
- (bkg_stride ? bkg_stride : dst->size);
- direction = -1;
- }
-
- /*
- * Direction & size of buffer traversal.
- */
- src_delta = direction * (buf_stride ? buf_stride : src->size);
- dst_delta = direction * (buf_stride ? buf_stride : dst->size);
- bkg_delta = direction * (bkg_stride ? bkg_stride : dst->size);
-
- /* Dynamically allocate the destination buffer */
- if ((dbuf=H5FL_BLK_MALLOC(vlen_seq,dst->size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
-
- /*
- * If the source and destination buffers overlap then use a
- * temporary buffer for the destination.
- */
- if (direction>0) {
- dptr = (uint8_t **)&dbuf;
+ /* Initialize source & destination strides */
+ if (buf_stride) {
+ assert(buf_stride>=src->size);
+ assert(buf_stride>=dst->size);
+ s_stride = d_stride = buf_stride;
} else {
- dptr = &dp;
+ s_stride = src->size;
+ d_stride = dst->size;
}
+ if(bkg) {
+ if(bkg_stride)
+ b_stride=bkg_stride;
+ else
+ b_stride=d_stride;
+ } /* end if */
+ else
+ b_stride=0;
/* Get the size of the base types in src & dst */
src_base_size=H5T_get_size(src->parent);
@@ -2561,121 +2526,142 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "unable to retrieve VL allocation info");
/* Set the flag for nested VL case */
- if(dst->u.vlen.f!=NULL && H5T_detect_class(dst->parent,H5T_VLEN) && bg_ptr!=NULL)
+ if(dst->u.vlen.f!=NULL && H5T_detect_class(dst->parent,H5T_VLEN) && bkg!=NULL)
nested=1;
- for (elmtno=0; elmtno<nelmts; elmtno++) {
- s = sp;
- d = *dptr;
-
- /* Check for "nil" source sequence */
- if((*(src->u.vlen.isnull))(src->u.vlen.f,s)) {
- /* Write "nil" sequence to destination location */
- if((*(dst->u.vlen.setnull))(dst->u.vlen.f,dxpl_id,d,bg_ptr)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't set VL data to 'nil'");
+ /* The outer loop of the type conversion macro, controlling which */
+ /* direction the buffer is walked */
+ while (nelmts>0) {
+ /* Check if we need to go backwards through the buffer */
+ if(d_stride>s_stride) {
+ /* Compute the number of "safe" destination elements at */
+ /* the end of the buffer (Those which don't overlap with */
+ /* any source elements at the beginning of the buffer) */
+ safe=nelmts-(((nelmts*s_stride)+(d_stride-1))/d_stride);
+
+ /* If we're down to the last few elements, just wrap up */
+ /* with a "real" reverse copy */
+ if(safe<2) {
+ s = (uint8_t*)buf+(nelmts-1)*s_stride;
+ d = (uint8_t*)buf+(nelmts-1)*d_stride;
+ b = (uint8_t*)bkg+(nelmts-1)*b_stride;
+ s_stride = -s_stride;
+ d_stride = -d_stride;
+ b_stride = -b_stride;
+
+ safe=nelmts;
+ } /* end if */
+ else {
+ s = (uint8_t*)buf+(nelmts-safe)*s_stride;
+ d = (uint8_t*)buf+(nelmts-safe)*d_stride;
+ b = (uint8_t*)bkg+(nelmts-safe)*b_stride;
+ } /* end else */
} /* end if */
else {
- /* Get length of element sequences */
- if((seq_len=(*(src->u.vlen.getlen))(s))<0)
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "incorrect length");
- H5_CHECK_OVERFLOW(seq_len,hssize_t,size_t);
- src_size=(size_t)seq_len*src_base_size;
- dst_size=(size_t)seq_len*dst_base_size;
-
- /* Check if conversion buffer is large enough, resize if
- * necessary */
- if(conv_buf_size<MAX(src_size,dst_size)) {
- conv_buf_size=MAX(src_size,dst_size);
- if((conv_buf=H5FL_BLK_REALLOC(vlen_seq,conv_buf, conv_buf_size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
- } /* end if */
+ /* Single forward pass over all data */
+ s = d = buf;
+ b = bkg;
+ safe=nelmts;
+ } /* end else */
- /* Read in VL sequence */
- if((*(src->u.vlen.read))(src->u.vlen.f,dxpl_id,s,conv_buf,src_size)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL data");
-
- /* Check if temporary buffer is large enough, resize if necessary */
- /* (Chain off the conversion buffer size) */
- if((tpath->cdata.need_bkg || H5T_detect_class(dst->parent, H5T_VLEN))
- && tmp_buf_size<conv_buf_size) {
- /* Set up initial background buffer */
- tmp_buf_size=conv_buf_size;
- if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf,tmp_buf_size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
+ for (elmtno=0; elmtno<safe; elmtno++) {
+ /* Check for "nil" source sequence */
+ if((*(src->u.vlen.isnull))(src->u.vlen.f,s)) {
+ /* Write "nil" sequence to destination location */
+ if((*(dst->u.vlen.setnull))(dst->u.vlen.f,dxpl_id,d,b)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't set VL data to 'nil'");
} /* end if */
+ else {
+ /* Get length of element sequences */
+ if((seq_len=(*(src->u.vlen.getlen))(s))<0)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "incorrect length");
+ H5_CHECK_OVERFLOW(seq_len,hssize_t,size_t);
+ src_size=(size_t)seq_len*src_base_size;
+ dst_size=(size_t)seq_len*dst_base_size;
+
+ /* Check if conversion buffer is large enough, resize if
+ * necessary */
+ if(conv_buf_size<MAX(src_size,dst_size)) {
+ conv_buf_size=MAX(src_size,dst_size);
+ if((conv_buf=H5FL_BLK_REALLOC(vlen_seq,conv_buf, conv_buf_size))==NULL)
+ HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
+ } /* end if */
- /* If we are writing and there is a nested VL type, read
- * the sequence into the background buffer */
- if(nested) {
- uint8_t *tmp=bg_ptr;
- UINT32DECODE(tmp, bg_seq_len);
-
- if(bg_seq_len>0) {
- H5_CHECK_OVERFLOW( bg_seq_len*MAX(src_base_size,dst_base_size) ,hsize_t,size_t);
- if(tmp_buf_size<(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size))) {
- tmp_buf_size=(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size));
- if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf, tmp_buf_size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
- }
- H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp, &(bg_hobjid.addr));
- INT32DECODE(tmp, bg_hobjid.idx);
- if(H5HG_read(dst->u.vlen.f,dxpl_id,&bg_hobjid,tmp_buf)==NULL)
- HGOTO_ERROR (H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL sequence into background buffer");
+ /* Read in VL sequence */
+ if((*(src->u.vlen.read))(src->u.vlen.f,dxpl_id,s,conv_buf,src_size)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL data");
+
+ /* Check if temporary buffer is large enough, resize if necessary */
+ /* (Chain off the conversion buffer size) */
+ if(tmp_buf && tmp_buf_size<conv_buf_size) {
+ /* Set up initial background buffer */
+ tmp_buf_size=conv_buf_size;
+ if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf,tmp_buf_size))==NULL)
+ HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
} /* end if */
- /* If the sequence gets shorter, pad out the original sequence with zeros */
- H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
- if((hssize_t)bg_seq_len<seq_len) {
- H5_CHECK_OVERFLOW((seq_len-bg_seq_len),hsize_t,size_t);
- HDmemset((uint8_t *)tmp_buf+dst_base_size*bg_seq_len,0,(size_t)(seq_len-bg_seq_len)*dst_base_size);
+ /* If we are writing and there is a nested VL type, read
+ * the sequence into the background buffer */
+ if(nested) {
+ uint8_t *tmp=b;
+ UINT32DECODE(tmp, bg_seq_len);
+
+ if(bg_seq_len>0) {
+ H5_CHECK_OVERFLOW( bg_seq_len*MAX(src_base_size,dst_base_size) ,hsize_t,size_t);
+ if(tmp_buf_size<(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size))) {
+ tmp_buf_size=(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size));
+ if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf, tmp_buf_size))==NULL)
+ HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
+ }
+ H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp, &(bg_hobjid.addr));
+ INT32DECODE(tmp, bg_hobjid.idx);
+ if(H5HG_read(dst->u.vlen.f,dxpl_id,&bg_hobjid,tmp_buf)==NULL)
+ HGOTO_ERROR (H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL sequence into background buffer");
+ } /* end if */
+
+ /* If the sequence gets shorter, pad out the original sequence with zeros */
+ H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
+ if((hssize_t)bg_seq_len<seq_len) {
+ H5_CHECK_OVERFLOW((seq_len-bg_seq_len),hsize_t,size_t);
+ HDmemset((uint8_t *)tmp_buf+dst_base_size*bg_seq_len,0,(size_t)(seq_len-bg_seq_len)*dst_base_size);
+ } /* end if */
} /* end if */
- } /* end if */
- /* Convert VL sequence */
- H5_CHECK_OVERFLOW(seq_len,hssize_t,hsize_t);
- if (H5T_convert(tpath, tsrc_id, tdst_id, (hsize_t)seq_len, 0, 0, conv_buf, tmp_buf, dxpl_id)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "datatype conversion failed");
-
- /* Write sequence to destination location */
- if((*(dst->u.vlen.write))(dst->u.vlen.f,dxpl_id,&vl_alloc_info,d,conv_buf, bg_ptr, (hsize_t)seq_len,(hsize_t)dst_base_size)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't write VL data");
-
- /* For nested VL case, free leftover heap objects from the deeper level if the length of new data elements is shorted than the old data elements.*/
- H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
- if(nested && seq_len<(hssize_t)bg_seq_len) {
- uint8_t *tmp_p=tmp_buf;
- tmp_p += seq_len*dst_base_size;
- for(i=0; i<(bg_seq_len-seq_len); i++) {
- UINT32DECODE(tmp_p, parent_seq_len);
- if(parent_seq_len>0) {
- H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp_p, &(parent_hobjid.addr));
- INT32DECODE(tmp_p, parent_hobjid.idx);
- if(H5HG_remove(dst->u.vlen.f, dxpl_id,&parent_hobjid)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "Unable to remove heap object");
+ /* Convert VL sequence */
+ H5_CHECK_OVERFLOW(seq_len,hssize_t,hsize_t);
+ if (H5T_convert(tpath, tsrc_id, tdst_id, (hsize_t)seq_len, 0, 0, conv_buf, tmp_buf, dxpl_id)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "datatype conversion failed");
+
+ /* Write sequence to destination location */
+ if((*(dst->u.vlen.write))(dst->u.vlen.f,dxpl_id,&vl_alloc_info,d,conv_buf, b, (hsize_t)seq_len,(hsize_t)dst_base_size)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't write VL data");
+
+ /* For nested VL case, free leftover heap objects from the deeper level if the length of new data elements is shorter than the old data elements.*/
+ H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
+ if(nested && seq_len<(hssize_t)bg_seq_len) {
+ uint8_t *tmp_p=tmp_buf;
+ tmp_p += seq_len*dst_base_size;
+ for(i=0; i<(bg_seq_len-seq_len); i++) {
+ UINT32DECODE(tmp_p, parent_seq_len);
+ if(parent_seq_len>0) {
+ H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp_p, &(parent_hobjid.addr));
+ INT32DECODE(tmp_p, parent_hobjid.idx);
+ if(H5HG_remove(dst->u.vlen.f, dxpl_id,&parent_hobjid)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "Unable to remove heap object");
+ }
}
}
- }
- } /* end else */
-
- /*
- * If we had used a temporary buffer for the destination
- * then we should copy the value to the true destination
- * buffer.
- */
- if (d==dbuf) HDmemcpy (dp, d, dst->size);
- sp += src_delta;
- dp += dst_delta;
- if(bg_ptr!=NULL)
- bg_ptr += bkg_delta;
-
- /* switch destination pointer around when the olap gets to 0 */
- if(--olap==0) {
- if(dptr==(uint8_t **)&dbuf)
- dptr=&dp;
- else
- dptr=(uint8_t **)&dbuf;
- } /* end if */
- }
+ } /* end else */
+
+ /* Advance pointers */
+ s += s_stride;
+ d += d_stride;
+ b += b_stride;
+ } /* end for */
+
+ /* Decrement number of elements left to convert */
+ nelmts-=safe;
+ } /* end while */
/* Release the temporary datatype IDs used */
if (tsrc_id >= 0)
@@ -2695,9 +2681,6 @@ done:
/* Release the background buffer, if we have one */
if(tmp_buf!=NULL)
H5FL_BLK_FREE(vlen_seq,tmp_buf);
- /* Release the destination buffer, if we have one */
- if(dbuf!=NULL)
- H5FL_BLK_FREE(vlen_seq,dbuf);
FUNC_LEAVE_NOAPI(ret_value);
}
diff --git a/src/H5Tprivate.h b/src/H5Tprivate.h
index 41b5828..5c76e82 100644
--- a/src/H5Tprivate.h
+++ b/src/H5Tprivate.h
@@ -88,8 +88,8 @@ H5_DLL herr_t H5T_convert(H5T_path_t *tpath, hid_t src_id, hid_t dst_id,
void *buf, void *bkg, hid_t dset_xfer_plist);
H5_DLL herr_t H5T_vlen_reclaim(void *elem, hid_t type_id, hsize_t ndim, hssize_t *point, void *_op_data);
H5_DLL herr_t H5T_vlen_get_alloc_info(hid_t dxpl_id, H5T_vlen_alloc_info_t *vl_alloc_info);
-H5_DLL htri_t H5T_is_sensible(const H5T_t *dt);
H5_DLL htri_t H5T_set_loc(H5T_t *dt, H5F_t *f, H5T_loc_t loc);
+H5_DLL htri_t H5T_is_sensible(const H5T_t *dt);
H5_DLL htri_t H5T_committed(H5T_t *type);
H5_DLL int H5T_link(const H5T_t *type, int adjust, hid_t dxpl_id);
diff --git a/src/H5Tvlen.c b/src/H5Tvlen.c
index 09e67fb..a2ef55a 100644
--- a/src/H5Tvlen.c
+++ b/src/H5Tvlen.c
@@ -1091,4 +1091,3 @@ H5T_vlen_get_alloc_info(hid_t dxpl_id, H5T_vlen_alloc_info_t *vl_alloc_info)
done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5T_vlen_get_alloc_info() */
-