summaryrefslogtreecommitdiffstats
path: root/src/H5Tconv.c
diff options
context:
space:
mode:
authorQuincey Koziol <koziol@hdfgroup.org>2004-06-13 01:01:46 (GMT)
committerQuincey Koziol <koziol@hdfgroup.org>2004-06-13 01:01:46 (GMT)
commit7dfe108b5329205732ab93f494e6287cc3090f21 (patch)
tree1c7906c6461cdd2fcc18ebb1ba29217a30c0b189 /src/H5Tconv.c
parenta3668504d93f7015661ad63c29190bceb2fd2fe4 (diff)
downloadhdf5-7dfe108b5329205732ab93f494e6287cc3090f21.zip
hdf5-7dfe108b5329205732ab93f494e6287cc3090f21.tar.gz
hdf5-7dfe108b5329205732ab93f494e6287cc3090f21.tar.bz2
[svn-r8667] Purpose:
Code optimization Description: Restructure conversion loop of variable-length objects to avoid walking through memory backwards and allocating as many temporary buffers. (This uses the optimized method used in the atomic type conversions). Also bring back another optimization for variable-length datatypes that avoids querying the DXPL so many times. Platforms tested: Solaris 2.7 (arabica) FreeBSD 4.10 (sleipnir) w/parallel Too minor to require h5committest
Diffstat (limited to 'src/H5Tconv.c')
-rw-r--r--src/H5Tconv.c317
1 files changed, 150 insertions, 167 deletions
diff --git a/src/H5Tconv.c b/src/H5Tconv.c
index 15f0701..f1b75b6 100644
--- a/src/H5Tconv.c
+++ b/src/H5Tconv.c
@@ -2219,19 +2219,21 @@ done:
*/
herr_t
H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
- size_t buf_stride, size_t bkg_stride, void *_buf,
- void *_bkg, hid_t dxpl_id)
+ size_t buf_stride, size_t bkg_stride, void *buf,
+ void *bkg, hid_t dxpl_id)
{
+ H5T_vlen_alloc_info_t vl_alloc_info;/* VL allocation information */
H5T_path_t *tpath; /* Type conversion path */
hid_t tsrc_id = -1, tdst_id = -1;/*temporary type atoms */
H5T_t *src = NULL; /*source data type */
H5T_t *dst = NULL; /*destination data type */
- hsize_t olap; /*num overlapping elements */
- uint8_t *s, *sp, *d, *dp; /*source and dest traversal ptrs */
- uint8_t **dptr; /*pointer to correct destination pointer*/
- uint8_t *bg_ptr=NULL; /*background buf traversal pointer */
H5HG_t bg_hobjid, parent_hobjid;
- size_t src_delta, dst_delta, bkg_delta;/*source & destination stride*/
+ uint8_t *s; /*source buffer */
+ uint8_t *d; /*destination buffer */
+ uint8_t *b; /*background buffer */
+ ssize_t s_stride, d_stride; /*src and dst strides */
+ ssize_t b_stride; /*bkg stride */
+ size_t safe; /*how many elements are safe to process in each pass */
hssize_t seq_len; /*the number of elements in the current sequence*/
hsize_t bg_seq_len=0, parent_seq_len=0;
size_t src_base_size, dst_base_size;/*source & destination base size*/
@@ -2240,8 +2242,6 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
size_t conv_buf_size=0; /*size of conversion buffer in bytes */
void *tmp_buf=NULL; /*temporary background buffer */
size_t tmp_buf_size=0; /*size of temporary bkg buffer */
- void *dbuf=NULL; /*temp destination buffer */
- int direction; /*direction of traversal */
int nested=0; /*flag of nested VL case */
hsize_t elmtno; /*element number counter */
hsize_t i;
@@ -2281,67 +2281,28 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
NULL == (dst = H5I_object(dst_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a data type");
- /*
- * Do we process the values from beginning to end or vice
- * versa? Also, how many of the elements have the source and
- * destination areas overlapping?
- */
- if (src->size==dst->size || buf_stride>0) {
- olap = nelmts;
- sp = dp = (uint8_t*)_buf;
- bg_ptr = (uint8_t*)_bkg;
- direction = 1;
- } else if (src->size>=dst->size) {
- /* potentially this uses the destination buffer 1 extra
- * time, but its faster that floating-point calcs */
- olap = ((dst->size)/(src->size-dst->size))+1;
- sp = dp = (uint8_t*)_buf;
- bg_ptr = (uint8_t*)_bkg;
- direction = 1;
- } else {
- /* potentially this uses the destination buffer 1 extra
- * time, but its faster that floating-point calcs */
- olap = nelmts-(((src->size)/(dst->size-src->size))+1);
- sp = (uint8_t*)_buf + (nelmts-1) *
- (buf_stride ? buf_stride : src->size);
- dp = (uint8_t*)_buf + (nelmts-1) *
- (buf_stride ? buf_stride : dst->size);
- if(_bkg!=NULL)
- bg_ptr = (uint8_t*)_bkg + (nelmts-1) *
- (bkg_stride ? bkg_stride : dst->size);
- direction = -1;
- }
-
- /*
- * Direction & size of buffer traversal.
- */
- src_delta = direction * (buf_stride ? buf_stride : src->size);
- dst_delta = direction * (buf_stride ? buf_stride : dst->size);
- bkg_delta = direction * (bkg_stride ? bkg_stride : dst->size);
-
- /* Dynamically allocate the destination buffer */
- if ((dbuf=H5FL_BLK_MALLOC(vlen_seq,dst->size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
-
- /*
- * If the source and destination buffers overlap then use a
- * temporary buffer for the destination.
- */
- if (direction>0) {
- dptr = (uint8_t **)&dbuf;
+ /* Initialize source & destination strides */
+ if (buf_stride) {
+ assert(buf_stride>=src->size);
+ assert(buf_stride>=dst->size);
+ s_stride = d_stride = buf_stride;
} else {
- dptr = &dp;
+ s_stride = src->size;
+ d_stride = dst->size;
}
+ if(bkg) {
+ if(bkg_stride)
+ b_stride=bkg_stride;
+ else
+ b_stride=d_stride;
+ } /* end if */
+ else
+ b_stride=0;
/* Get the size of the base types in src & dst */
src_base_size=H5T_get_size(src->parent);
dst_base_size=H5T_get_size(dst->parent);
- /* Get initial conversion buffer */
- conv_buf_size=MAX(src_base_size,dst_base_size);
- if ((conv_buf=H5FL_BLK_CALLOC(vlen_seq,conv_buf_size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
-
/* Set up conversion path for base elements */
if (NULL==(tpath=H5T_path_find(src->parent, dst->parent, NULL, NULL, dxpl_id))) {
HGOTO_ERROR(H5E_DATATYPE, H5E_UNSUPPORTED, FAIL, "unable to convert between src and dest datatypes");
@@ -2359,122 +2320,147 @@ H5T_conv_vlen(hid_t src_id, hid_t dst_id, H5T_cdata_t *cdata, hsize_t nelmts,
HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
} /* end if */
+ /* Get the allocation info */
+ if(H5T_vlen_get_alloc_info(dxpl_id,&vl_alloc_info)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTGET, FAIL, "unable to retrieve VL allocation info");
+
/* Set the flag for nested VL case */
- if(dst->u.vlen.f!=NULL && H5T_detect_class(dst->parent,H5T_VLEN) && bg_ptr!=NULL)
+ if(dst->u.vlen.f!=NULL && H5T_detect_class(dst->parent,H5T_VLEN) && bkg!=NULL)
nested=1;
- for (elmtno=0; elmtno<nelmts; elmtno++) {
- s = sp;
- d = *dptr;
-
- /* Check for "nil" source sequence */
- if((*(src->u.vlen.isnull))(src->u.vlen.f,s)) {
- /* Write "nil" sequence to destination location */
- if((*(dst->u.vlen.setnull))(dst->u.vlen.f,dxpl_id,d,bg_ptr)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't set VL data to 'nil'");
+ /* The outer loop of the type conversion macro, controlling which */
+ /* direction the buffer is walked */
+ while (nelmts>0) {
+ /* Check if we need to go backwards through the buffer */
+ if(d_stride>s_stride) {
+ /* Compute the number of "safe" destination elements at */
+ /* the end of the buffer (Those which don't overlap with */
+ /* any source elements at the beginning of the buffer) */
+ safe=nelmts-(((nelmts*s_stride)+(d_stride-1))/d_stride);
+
+ /* If we're down to the last few elements, just wrap up */
+ /* with a "real" reverse copy */
+ if(safe<2) {
+ s = (uint8_t*)buf+(nelmts-1)*s_stride;
+ d = (uint8_t*)buf+(nelmts-1)*d_stride;
+ b = (uint8_t*)bkg+(nelmts-1)*b_stride;
+ s_stride = -s_stride;
+ d_stride = -d_stride;
+ b_stride = -b_stride;
+
+ safe=nelmts;
+ } /* end if */
+ else {
+ s = (uint8_t*)buf+(nelmts-safe)*s_stride;
+ d = (uint8_t*)buf+(nelmts-safe)*d_stride;
+ b = (uint8_t*)bkg+(nelmts-safe)*b_stride;
+ } /* end else */
} /* end if */
else {
- /* Get length of element sequences */
- if((seq_len=(*(src->u.vlen.getlen))(s))<0)
- HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "incorrect length");
- H5_CHECK_OVERFLOW(seq_len,hssize_t,size_t);
- src_size=(size_t)seq_len*src_base_size;
- dst_size=(size_t)seq_len*dst_base_size;
-
- /* Check if conversion buffer is large enough, resize if
- * necessary */
- if(conv_buf_size<MAX(src_size,dst_size)) {
- conv_buf_size=MAX(src_size,dst_size);
- if((conv_buf=H5FL_BLK_REALLOC(vlen_seq,conv_buf, conv_buf_size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
- } /* end if */
+ /* Single forward pass over all data */
+ s = d = buf;
+ b = bkg;
+ safe=nelmts;
+ } /* end else */
- /* Read in VL sequence */
- if((*(src->u.vlen.read))(src->u.vlen.f,dxpl_id,s,conv_buf,src_size)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL data");
-
- /* Check if temporary buffer is large enough, resize if necessary */
- /* (Chain off the conversion buffer size) */
- if((tpath->cdata.need_bkg || H5T_detect_class(dst->parent, H5T_VLEN))
- && tmp_buf_size<conv_buf_size) {
- /* Set up initial background buffer */
- tmp_buf_size=conv_buf_size;
- if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf,tmp_buf_size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
+ for (elmtno=0; elmtno<safe; elmtno++) {
+ /* Check for "nil" source sequence */
+ if((*(src->u.vlen.isnull))(src->u.vlen.f,s)) {
+ /* Write "nil" sequence to destination location */
+ if((*(dst->u.vlen.setnull))(dst->u.vlen.f,dxpl_id,d,b)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't set VL data to 'nil'");
} /* end if */
+ else {
+ /* Get length of element sequences */
+ if((seq_len=(*(src->u.vlen.getlen))(s))<0)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "incorrect length");
+ H5_CHECK_OVERFLOW(seq_len,hssize_t,size_t);
+ src_size=(size_t)seq_len*src_base_size;
+ dst_size=(size_t)seq_len*dst_base_size;
+
+ /* Check if conversion buffer is large enough, resize if
+ * necessary */
+ if(conv_buf_size<MAX(src_size,dst_size)) {
+ conv_buf_size=MAX(src_size,dst_size);
+ if((conv_buf=H5FL_BLK_REALLOC(vlen_seq,conv_buf, conv_buf_size))==NULL)
+ HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
+ } /* end if */
- /* If we are writing and there is a nested VL type, read
- * the sequence into the background buffer */
- if(nested) {
- uint8_t *tmp=bg_ptr;
- UINT32DECODE(tmp, bg_seq_len);
- if(bg_seq_len>0) {
- H5_CHECK_OVERFLOW( bg_seq_len*MAX(src_base_size,dst_base_size) ,hsize_t,size_t);
- if(tmp_buf_size<(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size))) {
- tmp_buf_size=(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size));
- if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf, tmp_buf_size))==NULL)
- HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
- }
- H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp, &(bg_hobjid.addr));
- INT32DECODE(tmp, bg_hobjid.idx);
- if(H5HG_read(dst->u.vlen.f,dxpl_id,&bg_hobjid,tmp_buf)==NULL)
- HGOTO_ERROR (H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL sequence into background buffer");
+ /* Read in VL sequence */
+ if((*(src->u.vlen.read))(src->u.vlen.f,dxpl_id,s,conv_buf,src_size)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL data");
+
+ /* Check if temporary buffer is large enough, resize if necessary */
+ /* (Chain off the conversion buffer size) */
+ if(tmp_buf && tmp_buf_size<conv_buf_size) {
+ /* Set up initial background buffer */
+ tmp_buf_size=conv_buf_size;
+ if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf,tmp_buf_size))==NULL)
+ HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
} /* end if */
- /* If the sequence gets shorter, pad out the original sequence with zeros */
- H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
- if((hssize_t)bg_seq_len<seq_len) {
- H5_CHECK_OVERFLOW((seq_len-bg_seq_len),hsize_t,size_t);
- HDmemset((uint8_t *)tmp_buf+dst_base_size*bg_seq_len,0,(size_t)(seq_len-bg_seq_len)*dst_base_size);
+ /* If we are writing and there is a nested VL type, read
+ * the sequence into the background buffer */
+ if(nested) {
+ uint8_t *tmp=b;
+ UINT32DECODE(tmp, bg_seq_len);
+
+ if(bg_seq_len>0) {
+ H5_CHECK_OVERFLOW( bg_seq_len*MAX(src_base_size,dst_base_size) ,hsize_t,size_t);
+ if(tmp_buf_size<(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size))) {
+ tmp_buf_size=(size_t)(bg_seq_len*MAX(src_base_size, dst_base_size));
+ if((tmp_buf=H5FL_BLK_REALLOC(vlen_seq,tmp_buf, tmp_buf_size))==NULL)
+ HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, FAIL, "memory allocation failed for type conversion");
+ }
+ H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp, &(bg_hobjid.addr));
+ INT32DECODE(tmp, bg_hobjid.idx);
+ if(H5HG_read(dst->u.vlen.f,dxpl_id,&bg_hobjid,tmp_buf)==NULL)
+ HGOTO_ERROR (H5E_DATATYPE, H5E_READERROR, FAIL, "can't read VL sequence into background buffer");
+ } /* end if */
+
+ /* If the sequence gets shorter, pad out the original sequence with zeros */
+ H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
+ if((hssize_t)bg_seq_len<seq_len) {
+ H5_CHECK_OVERFLOW((seq_len-bg_seq_len),hsize_t,size_t);
+ HDmemset((uint8_t *)tmp_buf+dst_base_size*bg_seq_len,0,(size_t)(seq_len-bg_seq_len)*dst_base_size);
+ } /* end if */
} /* end if */
- } /* end if */
- /* Convert VL sequence */
- H5_CHECK_OVERFLOW(seq_len,hssize_t,hsize_t);
- if (H5T_convert(tpath, tsrc_id, tdst_id, (hsize_t)seq_len, 0, 0, conv_buf, tmp_buf, dxpl_id)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "datatype conversion failed");
-
- /* Write sequence to destination location */
- if((*(dst->u.vlen.write))(dst->u.vlen.f,dxpl_id,d,conv_buf, bg_ptr, (hsize_t)seq_len,(hsize_t)dst_base_size)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't write VL data");
-
- /* For nested VL case, free leftover heap objects from the deeper level if the length of new data elements is shorted than the old data elements.*/
- H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
- if(nested && seq_len<(hssize_t)bg_seq_len) {
- uint8_t *tmp_p=tmp_buf;
- tmp_p += seq_len*dst_base_size;
- for(i=0; i<(bg_seq_len-seq_len); i++) {
- UINT32DECODE(tmp_p, parent_seq_len);
- if(parent_seq_len>0) {
- H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp_p, &(parent_hobjid.addr));
- INT32DECODE(tmp_p, parent_hobjid.idx);
- if(H5HG_remove(dst->u.vlen.f, dxpl_id,&parent_hobjid)<0)
- HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "Unable to remove heap object");
+ /* Convert VL sequence */
+ H5_CHECK_OVERFLOW(seq_len,hssize_t,hsize_t);
+ if (H5T_convert(tpath, tsrc_id, tdst_id, (hsize_t)seq_len, 0, 0, conv_buf, tmp_buf, dxpl_id)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "datatype conversion failed");
+
+ /* Write sequence to destination location */
+ if((*(dst->u.vlen.write))(dst->u.vlen.f,dxpl_id,&vl_alloc_info,d,conv_buf, b, (hsize_t)seq_len,(hsize_t)dst_base_size)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "can't write VL data");
+
+ /* For nested VL case, free leftover heap objects from the deeper level if the length of new data elements is shorter than the old data elements.*/
+ H5_CHECK_OVERFLOW(bg_seq_len,hsize_t,hssize_t);
+ if(nested && seq_len<(hssize_t)bg_seq_len) {
+ uint8_t *tmp_p=tmp_buf;
+ tmp_p += seq_len*dst_base_size;
+ for(i=0; i<(bg_seq_len-seq_len); i++) {
+ UINT32DECODE(tmp_p, parent_seq_len);
+ if(parent_seq_len>0) {
+ H5F_addr_decode(dst->u.vlen.f, (const uint8_t **)&tmp_p, &(parent_hobjid.addr));
+ INT32DECODE(tmp_p, parent_hobjid.idx);
+ if(H5HG_remove(dst->u.vlen.f, dxpl_id,&parent_hobjid)<0)
+ HGOTO_ERROR(H5E_DATATYPE, H5E_WRITEERROR, FAIL, "Unable to remove heap object");
+ }
}
}
- }
- } /* end else */
-
+ } /* end else */
- /*
- * If we had used a temporary buffer for the destination
- * then we should copy the value to the true destination
- * buffer.
- */
- if (d==dbuf) HDmemcpy (dp, d, dst->size);
- sp += src_delta;
- dp += dst_delta;
- if(bg_ptr!=NULL)
- bg_ptr += bkg_delta;
-
- /* switch destination pointer around when the olap gets to 0 */
- if(--olap==0) {
- if(dptr==(uint8_t **)&dbuf)
- dptr=&dp;
- else
- dptr=(uint8_t **)&dbuf;
- } /* end if */
- }
+ /* Advance pointers */
+ s += s_stride;
+ d += d_stride;
+ b += b_stride;
+ } /* end for */
+
+ /* Decrement number of elements left to convert */
+ nelmts-=safe;
+ } /* end while */
/* Release the temporary datatype IDs used */
if (tsrc_id >= 0)
@@ -2494,9 +2480,6 @@ done:
/* Release the background buffer, if we have one */
if(tmp_buf!=NULL)
H5FL_BLK_FREE(vlen_seq,tmp_buf);
- /* Release the destination buffer, if we have one */
- if(dbuf!=NULL)
- H5FL_BLK_FREE(vlen_seq,dbuf);
FUNC_LEAVE_NOAPI(ret_value);
}