diff options
author | Larry Knox <lrknox@hdfgroup.org> | 2019-07-25 16:36:37 (GMT) |
---|---|---|
committer | Larry Knox <lrknox@hdfgroup.org> | 2019-07-25 16:47:12 (GMT) |
commit | 8008294578b5a133907d7ab1dd20e34735c54535 (patch) | |
tree | d1b9228d468afc05da9333567ea43a04bb0c4272 /src | |
parent | d3fdcd8a680ad0f8b21304b35e8564b774a88ef0 (diff) | |
download | hdf5-8008294578b5a133907d7ab1dd20e34735c54535.zip hdf5-8008294578b5a133907d7ab1dd20e34735c54535.tar.gz hdf5-8008294578b5a133907d7ab1dd20e34735c54535.tar.bz2 |
Squashed commit of the following:
Merge changes from update_merged_S3_HDFS branch into develop.
commit d5034315aea88629929ac0c9c59ebfafd5f21a31
Merge: 9c48823 d3fdcd8
Author: Larry Knox <lrknox@hdfgroup.org>
Date: Thu Jul 25 08:24:53 2019 -0500
Merge branch 'develop' into update_merged_S3_HDFS
Diffstat (limited to 'src')
-rw-r--r-- | src/CMakeLists.txt | 7 | ||||
-rw-r--r-- | src/H5FDhdfs.c | 2070 | ||||
-rw-r--r-- | src/H5FDhdfs.h | 122 | ||||
-rw-r--r-- | src/H5FDros3.c | 1847 | ||||
-rw-r--r-- | src/H5FDros3.h | 105 | ||||
-rw-r--r-- | src/H5FDs3comms.c | 3770 | ||||
-rw-r--r-- | src/H5FDs3comms.h | 634 | ||||
-rw-r--r-- | src/Makefile.am | 8 | ||||
-rw-r--r-- | src/hdf5.h | 18 | ||||
-rw-r--r-- | src/libhdf5.settings.in | 2 |
10 files changed, 8571 insertions, 12 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4106515..2b693bd 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -227,11 +227,14 @@ set (H5FD_SOURCES ${HDF5_SRC_DIR}/H5FDcore.c ${HDF5_SRC_DIR}/H5FDdirect.c ${HDF5_SRC_DIR}/H5FDfamily.c + ${HDF5_SRC_DIR}/H5FDhdfs.c ${HDF5_SRC_DIR}/H5FDint.c ${HDF5_SRC_DIR}/H5FDlog.c ${HDF5_SRC_DIR}/H5FDmpi.c ${HDF5_SRC_DIR}/H5FDmpio.c ${HDF5_SRC_DIR}/H5FDmulti.c + ${HDF5_SRC_DIR}/H5FDros3.c + ${HDF5_SRC_DIR}/H5FDs3comms.c ${HDF5_SRC_DIR}/H5FDsec2.c ${HDF5_SRC_DIR}/H5FDspace.c ${HDF5_SRC_DIR}/H5FDstdio.c @@ -243,11 +246,14 @@ set (H5FD_HDRS ${HDF5_SRC_DIR}/H5FDcore.h ${HDF5_SRC_DIR}/H5FDdirect.h ${HDF5_SRC_DIR}/H5FDfamily.h + ${HDF5_SRC_DIR}/H5FDhdfs.h ${HDF5_SRC_DIR}/H5FDlog.h ${HDF5_SRC_DIR}/H5FDmpi.h ${HDF5_SRC_DIR}/H5FDmpio.h ${HDF5_SRC_DIR}/H5FDmulti.h ${HDF5_SRC_DIR}/H5FDpublic.h + ${HDF5_SRC_DIR}/H5FDros3.h + ${HDF5_SRC_DIR}/H5FDs3comms.c ${HDF5_SRC_DIR}/H5FDsec2.h ${HDF5_SRC_DIR}/H5FDstdio.h ${HDF5_SRC_DIR}/H5FDwindows.h @@ -1142,6 +1148,7 @@ if (BUILD_SHARED_LIBS) add_library (${HDF5_LIBSH_TARGET} SHARED ${common_SRCS} ${shared_gen_SRCS} ${H5_PUBLIC_HEADERS} ${H5_PRIVATE_HEADERS} ${H5_GENERATED_HEADERS}) target_include_directories (${HDF5_LIBSH_TARGET} PRIVATE "${HDF5_SRC_DIR};${HDF5_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>" + PUBLIC "$<$<BOOL:${HDF5_ENABLE_HDFS}>:${HDFS_INCLUDE_DIR}>" INTERFACE "$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>" ) target_compile_definitions(${HDF5_LIBSH_TARGET} diff --git a/src/H5FDhdfs.c b/src/H5FDhdfs.c new file mode 100644 index 0000000..e3e11b2 --- /dev/null +++ b/src/H5FDhdfs.c @@ -0,0 +1,2070 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Read-Only HDFS Virtual File Driver (VFD) * + * Copyright (c) 2018, The HDF Group. * + * * + * All rights reserved. * + * * + * NOTICE: * + * All information contained herein is, and remains, the property of The HDF * + * Group. The intellectual and technical concepts contained herein are * + * proprietary to The HDF Group. Dissemination of this information or * + * reproduction of this material is strictly forbidden unless prior written * + * permission is obtained from The HDF Group. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Jacob Smith + * 2018-04-23 + * + * Purpose: Provide read-only access to files on the Hadoop Distributed + * File System (HDFS). + */ + +/* This source code file is part of the H5FD driver module */ +#include "H5FDdrvr_module.h" + +#include "H5private.h" /* Generic Functions */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5FDprivate.h" /* File drivers */ +#include "H5FDhdfs.h" /* hdfs file driver */ +#include "H5FLprivate.h" /* Free Lists */ +#include "H5Iprivate.h" /* IDs */ +#include "H5MMprivate.h" /* Memory management */ + +#ifdef H5_HAVE_LIBHDFS +#include "hdfs.h" +#endif + +/* toggle function call prints: 1 turns on */ +#define HDFS_DEBUG 0 + +/* toggle stats collection and reporting */ +#define HDFS_STATS 0 + +/* The driver identification number, initialized at runtime */ +static hid_t H5FD_HDFS_g = 0; + +#if HDFS_STATS + +/* arbitrarily large value, such that any reasonable size read will be "less" + * than this value and set a true minimum + * not 0 because that may be a valid recorded minimum in degenerate cases + */ +#define HDFS_STATS_STARTING_MIN 0xfffffffful + +/* Configuration definitions for stats collection and breakdown + * + * 2^10 = 1024 + * Reads up to 1024 bytes (1 kB) fall in bin 0 + * 2^(10+(1*16)) = 2^26 = 64MB + * Reads of 64MB or greater fall in "overflow" bin[BIN_COUNT] + */ +#define HDFS_STATS_BASE 2 +#define HDFS_STATS_INTERVAL 1 +#define HDFS_STATS_START_POWER 10 +#define HDFS_STATS_BIN_COUNT 16 /* MUST BE GREATER THAN 0 */ + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Calculate `BASE ^ (START_POWER + (INTERVAL * bin_i))` + * Stores result at `(unsigned long long *) out_ptr`. + * Used in computing boundaries between stats bins. + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + */ +#define HDFS_STATS_POW(bin_i, out_ptr) { \ + unsigned long long donotshadowresult = 1; \ + unsigned donotshadowindex = 0; \ + for (donotshadowindex = 0; \ + donotshadowindex < (((bin_i) * HDFS_STATS_INTERVAL) + \ + HDFS_STATS_START_POWER); \ + donotshadowindex++) \ + { \ + donotshadowresult *= HDFS_STATS_BASE; \ + } \ + *(out_ptr) = donotshadowresult; \ +} + +/* array to hold pre-computed boundaries for stats bins */ +static unsigned long long hdfs_stats_boundaries[HDFS_STATS_BIN_COUNT]; + + +/*************************************************************************** + * + * Structure: hdfs_statsbin + * + * Purpose: + * + * Structure for storing per-file hdfs VFD usage statistics. + * + * + * + * `count` (unsigned long long) + * + * Number of reads with size in this bin's range. + * + * `bytes` (unsigned long long) + * + * Total number of bytes read through this bin. + * + * `min` (unsigned long long) + * + * Smallest read size in this bin. + * + * `max` (unsigned long long) + * + * Largest read size in this bin. + * + * + * + * Programmer: Jacob Smith + * + * Changes: None + * + ***************************************************************************/ +typedef struct { + unsigned long long count; + unsigned long long bytes; + unsigned long long min; + unsigned long long max; +} hdfs_statsbin; + +#endif /* HDFS_STATS */ + +/* "unique" identifier for `hdfs_t` structures. + * Randomly generated by unweighted dice rolls. + */ +#define HDFS_HDFST_MAGIC 0x1AD5DE84 + + +/*************************************************************************** + * + * Structure: hdfs_t + * + * Purpose: + * + * Contain/retain information associated with a file hosted on Hadoop + * Distributed File System (HDFS). Instantiated and populated via + * `H5FD_hdfs_handle_open()` and cleaned up via `H5FD_hdfs_handle_close()`. + * + * + * + * `magic` (unisgned long) + * + * Number to indicate that this structure is of the promised + * type and should still be valid; should be HDFS_HDFST_MAGIC throughout + * the lifespan of the structure. Upon deletion of the structure, the + * programmer should set magic to anything but HDFS_HDFST_MAGIC, to + * indicate that the structure is to no longer be trusted. + * + * `filesystem` (hdfsFS) + * + * A libhdfs file system handle. + * + * `fileinfo` (hdfsFileInfo*) + * + * A pointer to a libhdfs file info structure. + * + * `file` (hdfsFile) + * + * A libhdfs file handle. + * + * + * + * Programmer: Jacob Smith + * May 2018 + * + * Changes: None + * + *************************************************************************** + */ +typedef struct { + unsigned long magic; +#ifdef H5_HAVE_LIBHDFS + hdfsFS filesystem; + hdfsFileInfo *fileinfo; + hdfsFile file; +#endif +} hdfs_t; + +#ifdef H5_HAVE_LIBHDFS + +/*-------------------------------------------------------------------------- + * Function: H5FD_hdfs_handle_open + * + * Purpose: Create a HDFS file handle, 'opening' the target file. + * + * Return: Success: Pointer to HDFS container/handle of opened file. + * Failure: NULL + * + * Programmer: Gerd Herber + * May 2018 + * + * Changes: None. + *-------------------------------------------------------------------------- + */ +static hdfs_t * +H5FD_hdfs_handle_open( + const char *path, + const char *namenode_name, + const int32_t namenode_port, + const char *user_name, + const char *kerberos_ticket_cache, + const int32_t stream_buffer_size) +{ + struct hdfsBuilder *builder = NULL; + hdfs_t *handle = NULL; + hdfs_t *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + +#if HDFS_DEBUG + HDfprintf(stdout, "called H5FD_hdfs_handle_open.\n"); +#endif + + if (path == NULL || path[0] == '\0') { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "path cannot be null.\n") + } + if (namenode_name == NULL /* || namenode_name[0] == '\0' */ ) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "namenode name cannot be null.\n") + } + if (namenode_port < 0 || namenode_port > 65535) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "namenode port must be non-negative and <= 65535.\n") + } + if (stream_buffer_size < 0) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "buffer size must non-negative.\n") + } + + handle = (hdfs_t *)H5MM_malloc(sizeof(hdfs_t)); + if (handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL, + "could not malloc space for handle.\n") + } + + handle->magic = (unsigned long)HDFS_HDFST_MAGIC; + handle->filesystem = NULL; /* TODO: not a pointer; NULL may cause bug */ + handle->fileinfo = NULL; + handle->file = NULL; /* TODO: not a pointer; NULL may cause bug */ + + builder = hdfsNewBuilder(); + if (!builder) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "(hdfs) failed to create builder") + } + hdfsBuilderSetNameNode(builder, namenode_name); + hdfsBuilderSetNameNodePort(builder, (tPort)namenode_port); + if (user_name != NULL && user_name[0] != '\0') { + hdfsBuilderSetUserName(builder, user_name); + } + if (kerberos_ticket_cache != NULL && kerberos_ticket_cache[0] != '\0') { + hdfsBuilderSetKerbTicketCachePath(builder, kerberos_ticket_cache); + } + /* Call to `hdfsBuilderConnect` releases builder, regardless of success. */ + handle->filesystem = hdfsBuilderConnect(builder); + if (!handle->filesystem) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "(hdfs) could not connect to default namenode") + } + handle->fileinfo = hdfsGetPathInfo(handle->filesystem, path); + if (!handle->fileinfo) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "hdfsGetPathInfo failed") + } + handle->file = hdfsOpenFile( + handle->filesystem, + path, + O_RDONLY, + stream_buffer_size, + 0, + 0); + if (!handle->file) { + HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, + "(hdfs) could not open") + } + + ret_value = handle; + +done: + if (ret_value == NULL && handle != NULL) { + /* error; clean up */ + HDassert(handle->magic == HDFS_HDFST_MAGIC); + handle->magic++; + if (handle->file != NULL) { + if (FAIL == (hdfsCloseFile(handle->filesystem, handle->file))) { + HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL, + "unable to close hdfs file handle") + } + } + if (handle->fileinfo != NULL) { + hdfsFreeFileInfo(handle->fileinfo, 1); + } + if (handle->filesystem != NULL) { + if (FAIL == (hdfsDisconnect(handle->filesystem))) { + HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL, + "unable to disconnect from hdfs") + } + } + H5MM_xfree(handle); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_handle_open() */ + + +/*-------------------------------------------------------------------------- + * Function: H5FD_hdfs_handle_close + * + * Purpose: 'Close' an HDFS file container/handle, releasing underlying + * resources. + * + * Return: Success: `SUCCEED` (0) + * Failure: `FAIL` (-1) + * + * Programmer: Gerd Herber + * May 2018 + * + * Changes: None. + *-------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_handle_close(hdfs_t *handle) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + +#if HDFS_DEBUG + HDfprintf(stdout, "called H5FD_hdfs_close.\n"); +#endif + + if (handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle cannot be null.\n") + } + if (handle->magic != HDFS_HDFST_MAGIC) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle has invalid magic.\n") + } + + handle->magic++; + if (handle->file != NULL) { + if (FAIL == (hdfsCloseFile(handle->filesystem, handle->file))) { + HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, + "unable to close hdfs file handle") + } + } + if (handle->fileinfo != NULL) { + hdfsFreeFileInfo(handle->fileinfo, 1); + } + if (handle->filesystem != NULL) { + if (FAIL == (hdfsDisconnect(handle->filesystem))) { + HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, + "unable to disconnect hdfs file system") + } + } + + H5MM_xfree(handle); + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_close() */ +#endif /* H5_HAVE_LIBHDFS */ + + +/*************************************************************************** + * + * Structure: H5FD_hdfs_t + * + * Purpose: + * + * H5FD_hdfs_t is a structure used to store all information needed to + * maintain R/O access to a single HDF5 file in an HDFS file system. + * This structure is created when such a file is "opened" and + * discarded when it is "closed". + * + * + * `pub` (H5FD_t) + * + * Instance of H5FD_t which contains all fields common to all VFDs. + * It must be the first item in this structure, since at higher levels, + * this structure will be treated as an instance of H5FD_t. + * + * `fa` (H5FD_hdfs_fapl_t) + * + * Instance of `H5FD_hdfs_fapl_t` containing the HDFS configuration data + * needed to "open" the HDF5 file. + * + * `eoa` (haddr_t) + * + * End of addressed space in file. After open, it should always + * equal the file size. + * + * `hdfs_handle` (hdfs_t *) + * + * Instance of HDFS Request handle associated with the target resource. + * Responsible for communicating with remote host and presenting file + * contents as indistinguishable from a file on the local filesystem. + * + * *** present only if HDFS_SATS is flagged to enable stats collection *** + * + * `meta` (hdfs_statsbin[]) + * `raw` (hdfs_statsbin[]) + * + * Only present if hdfs stats collection is enabled. + * + * Arrays of `hdfs_statsbin` structures to record raw- and metadata reads. + * + * Records count and size of reads performed by the VFD, and is used to + * print formatted usage statistics to stdout upon VFD shutdown. + * + * Reads of each raw- and metadata type are recorded in an individual bin + * determined by the size of the read. The last bin of each type is + * reserved for "big" reads, with no defined upper bound. + * + * *** end HDFS_STATS *** + * + * + * + * Programmer: Jacob Smith + * + * Changes: None. + * + *************************************************************************** + */ +typedef struct H5FD_hdfs_t { + H5FD_t pub; + H5FD_hdfs_fapl_t fa; + haddr_t eoa; +#ifdef H5_HAVE_LIBHDFS + hdfs_t *hdfs_handle; +#endif +#if HDFS_STATS + hdfs_statsbin meta[HDFS_STATS_BIN_COUNT + 1]; + hdfs_statsbin raw[HDFS_STATS_BIN_COUNT + 1]; +#endif +} H5FD_hdfs_t; + +/* + * These macros check for overflow of various quantities. These macros + * assume that HDoff_t is signed and haddr_t and size_t are unsigned. + * + * ADDR_OVERFLOW: Checks whether a file address of type `haddr_t' + * is too large to be represented by the second argument + * of the file seek function. + * + */ +#define MAXADDR (((haddr_t)1<<(8*sizeof(HDoff_t)-1))-1) +#define ADDR_OVERFLOW(A) (HADDR_UNDEF==(A) || ((A) & ~(haddr_t)MAXADDR)) + +/* Prototypes */ +static herr_t H5FD_hdfs_term(void); +static void *H5FD_hdfs_fapl_get(H5FD_t *_file); +static void *H5FD_hdfs_fapl_copy(const void *_old_fa); +static herr_t H5FD_hdfs_fapl_free(void *_fa); +static H5FD_t *H5FD_hdfs_open(const char *name, unsigned flags, hid_t fapl_id, + haddr_t maxaddr); +static herr_t H5FD_hdfs_close(H5FD_t *_file); +static int H5FD_hdfs_cmp(const H5FD_t *_f1, const H5FD_t *_f2); +static herr_t H5FD_hdfs_query(const H5FD_t *_f1, unsigned long *flags); +static haddr_t H5FD_hdfs_get_eoa(const H5FD_t *_file, H5FD_mem_t type); +static herr_t H5FD_hdfs_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr); +static haddr_t H5FD_hdfs_get_eof(const H5FD_t *_file, H5FD_mem_t type); +static herr_t H5FD_hdfs_get_handle(H5FD_t *_file, hid_t fapl, + void** file_handle); +static herr_t H5FD_hdfs_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, + haddr_t addr, size_t size, void *buf); +static herr_t H5FD_hdfs_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, + haddr_t addr, size_t size, const void *buf); +static herr_t H5FD_hdfs_truncate(H5FD_t *_file, hid_t dxpl_id, + hbool_t closing); +static herr_t H5FD_hdfs_lock(H5FD_t *_file, hbool_t rw); +static herr_t H5FD_hdfs_unlock(H5FD_t *_file); +static herr_t H5FD_hdfs_validate_config(const H5FD_hdfs_fapl_t * fa); + +static const H5FD_class_t H5FD_hdfs_g = { + "hdfs", /* name */ + MAXADDR, /* maxaddr */ + H5F_CLOSE_WEAK, /* fc_degree */ + H5FD_hdfs_term, /* terminate */ + NULL, /* sb_size */ + NULL, /* sb_encode */ + NULL, /* sb_decode */ + sizeof(H5FD_hdfs_fapl_t), /* fapl_size */ + H5FD_hdfs_fapl_get, /* fapl_get */ + H5FD_hdfs_fapl_copy, /* fapl_copy */ + H5FD_hdfs_fapl_free, /* fapl_free */ + 0, /* dxpl_size */ + NULL, /* dxpl_copy */ + NULL, /* dxpl_free */ + H5FD_hdfs_open, /* open */ + H5FD_hdfs_close, /* close */ + H5FD_hdfs_cmp, /* cmp */ + H5FD_hdfs_query, /* query */ + NULL, /* get_type_map */ + NULL, /* alloc */ + NULL, /* free */ + H5FD_hdfs_get_eoa, /* get_eoa */ + H5FD_hdfs_set_eoa, /* set_eoa */ + H5FD_hdfs_get_eof, /* get_eof */ + H5FD_hdfs_get_handle, /* get_handle */ + H5FD_hdfs_read, /* read */ + H5FD_hdfs_write, /* write */ + NULL, /* flush */ + H5FD_hdfs_truncate, /* truncate */ + H5FD_hdfs_lock, /* lock */ + H5FD_hdfs_unlock, /* unlock */ + H5FD_FLMAP_DICHOTOMY /* fl_map */ +}; + +/* Declare a free list to manage the H5FD_hdfs_t struct */ +H5FL_DEFINE_STATIC(H5FD_hdfs_t); + + +/*------------------------------------------------------------------------- + * Function: H5FD__init_package + * + * Purpose: Initializes any interface-specific data or routines. + * + * Return: Non-negative on success/Negative on failure + * + * Changes: Rename as appropriate for hdfs vfd. + * Jacob Smith 2018 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__init_package(void) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + if (H5FD_hdfs_init() < 0) { + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, + "unable to initialize hdfs VFD") + } + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD__init_package() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_hdfs_init + * + * Purpose: Initialize this driver by registering the driver with the + * library. + * + * Return: Success: The driver ID for the hdfs driver. + * Failure: Negative + * + * Programmer: Robb Matzke + * Thursday, July 29, 1999 + * + * Changes: Rename as appropriate for hdfs vfd. + * Jacob Smith 2018 + * + *------------------------------------------------------------------------- + */ +hid_t +H5FD_hdfs_init(void) +{ + hid_t ret_value = H5I_INVALID_HID; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_init() called.\n"); +#endif + + if (H5I_VFL != H5I_get_type(H5FD_HDFS_g)) { + H5FD_HDFS_g = H5FD_register( + &H5FD_hdfs_g, + sizeof(H5FD_class_t), + FALSE); + } + +#if HDFS_STATS + /* pre-compute statsbin boundaries + */ + for (unsigned bin_i = 0; bin_i < HDFS_STATS_BIN_COUNT; bin_i++) { + unsigned long long value = 0; + HDFS_STATS_POW(bin_i, &value) + hdfs_stats_boundaries[bin_i] = value; + } +#endif + + ret_value = H5FD_HDFS_g; + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_hdfs_init() */ + + +/*--------------------------------------------------------------------------- + * Function: H5FD_hdfs_term + * + * Purpose: Shut down the VFD + * + * Returns: SUCCEED (Can't fail) + * + * Programmer: Quincey Koziol + * Friday, Jan 30, 2004 + * + * Changes: Rename as appropriate for hdfs vfd. + * Jacob Smith 2018 + * + *--------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_term(void) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_term() called.\n"); +#endif + + /* Reset VFL ID */ + H5FD_HDFS_g = 0; + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* end H5FD_hdfs_term() */ + + +/*------------------------------------------------------------------------- + * Function: H5Pset_fapl_hdfs + * + * Purpose: Modify the file access property list to use the H5FD_HDFS + * driver defined in this source file. All driver specfic + * properties are passed in as a pointer to a suitably + * initialized instance of H5FD_hdfs_fapl_t + * + * Return: SUCCEED/FAIL + * + * Programmer: John Mainzer + * 9/10/17 + * + * Changes: Rename as appropriate for hdfs vfd. + * Jacob Smith 2018 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pset_fapl_hdfs(hid_t fapl_id, + H5FD_hdfs_fapl_t *fa) +{ + H5P_genplist_t *plist = NULL; /* Property list pointer */ + herr_t ret_value = FAIL; + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "i*x", fapl_id, fa); + + HDassert(fa != NULL); + +#if HDFS_DEBUG + HDfprintf(stdout, "H5Pset_fapl_hdfs() called.\n"); +#endif + + plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS); + if (plist == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \ + "not a file access property list") + } + + if (FAIL == H5FD_hdfs_validate_config(fa)) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "invalid hdfs config") + } + + ret_value = H5P_set_driver(plist, H5FD_HDFS, (void *)fa); + +done: + FUNC_LEAVE_API(ret_value) + +} /* H5Pset_fapl_hdfs() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_hdfs_validate_config() + * + * Purpose: Test to see if the supplied instance of H5FD_hdfs_fapl_t + * contains internally consistant data. Return SUCCEED if so, + * and FAIL otherwise. + * + * Note the difference between internally consistant and + * correct. As we will have to try to access the target + * object to determine whether the supplied data is correct, + * we will settle for internal consistancy at this point + * + * Return: SUCCEED if instance of H5FD_hdfs_fapl_t contains internally + * consistant data, FAIL otherwise. + * + * Programmer: Jacob Smith + * 9/10/17 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_validate_config(const H5FD_hdfs_fapl_t * fa) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(fa != NULL); + + if ( fa->version != H5FD__CURR_HDFS_FAPL_T_VERSION ) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Unknown H5FD_hdfs_fapl_t version"); + } + + if ( fa->namenode_port > 65535 ) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Invalid namenode port number"); + } + if ( fa->namenode_port < 0 ) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Invalid namenode port number"); + } + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_validate_config() */ + + +/*------------------------------------------------------------------------- + * Function: H5Pget_fapl_hdfs + * + * Purpose: Returns information about the hdfs file access property + * list though the function arguments. + * + * Return: Success: Non-negative + * + * Failure: Negative + * + * Programmer: John Mainzer + * 9/10/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pget_fapl_hdfs(hid_t fapl_id, + H5FD_hdfs_fapl_t *fa_out) +{ + const H5FD_hdfs_fapl_t *fa = NULL; + H5P_genplist_t *plist = NULL; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "i*x", fapl_id, fa_out); + +#if HDFS_DEBUG + HDfprintf(stdout, "H5Pget_fapl_hdfs() called.\n"); +#endif + + if (fa_out == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "fa_out is NULL") + } + plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS); + if (plist == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, + "not a file access list") + } + if (H5FD_HDFS != H5P_peek_driver(plist)) { + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, + "incorrect VFL driver") + } + + fa = (const H5FD_hdfs_fapl_t *)H5P_peek_driver_info(plist); + if (fa == NULL) { + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, + "bad VFL driver info") + } + + /* Copy the hdfs fapl data out */ + HDmemcpy(fa_out, fa, sizeof(H5FD_hdfs_fapl_t)); + +done: + FUNC_LEAVE_API(ret_value) + +} /* H5Pget_fapl_hdfs() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_hdfs_fapl_get + * + * Purpose: Gets a file access property list which could be used to + * create an identical file. + * + * Return: Success: Ptr to new file access property list value. + * + * Failure: NULL + * + * Programmer: John Mainzer + * 9/8/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void * +H5FD_hdfs_fapl_get(H5FD_t *_file) +{ + H5FD_hdfs_t *file = (H5FD_hdfs_t*)_file; + H5FD_hdfs_fapl_t *fa = NULL; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + fa = (H5FD_hdfs_fapl_t *)H5MM_calloc(sizeof(H5FD_hdfs_fapl_t)); + if (fa == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, + "memory allocation failed") + } + + /* Copy the fields of the structure */ + HDmemcpy(fa, &(file->fa), sizeof(H5FD_hdfs_fapl_t)); + + ret_value = fa; + +done: + if (ret_value == NULL && fa != NULL) { + H5MM_xfree(fa); /* clean up on error */ + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_fapl_get() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_hdfs_fapl_copy + * + * Purpose: Copies the hdfs-specific file access properties. + * + * Return: Success: Ptr to a new property list + * + * Failure: NULL + * + * Programmer: John Mainzer + * 9/8/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void * +H5FD_hdfs_fapl_copy(const void *_old_fa) +{ + const H5FD_hdfs_fapl_t *old_fa = (const H5FD_hdfs_fapl_t*)_old_fa; + H5FD_hdfs_fapl_t *new_fa = NULL; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + new_fa = (H5FD_hdfs_fapl_t *)H5MM_malloc(sizeof(H5FD_hdfs_fapl_t)); + if (new_fa == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, + "memory allocation failed") + } + + HDmemcpy(new_fa, old_fa, sizeof(H5FD_hdfs_fapl_t)); + ret_value = new_fa; + +done: + if (ret_value == NULL && new_fa != NULL) { + H5MM_xfree(new_fa); /* clean up on error */ + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_fapl_copy() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_hdfs_fapl_free + * + * Purpose: Frees the hdfs-specific file access properties. + * + * Return: SUCCEED (cannot fail) + * + * Programmer: John Mainzer + * 9/8/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_fapl_free(void *_fa) +{ + H5FD_hdfs_fapl_t *fa = (H5FD_hdfs_fapl_t*)_fa; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + HDassert(fa != NULL); /* sanity check */ + + H5MM_xfree(fa); + + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* H5FD_hdfs_fapl_free() */ + +#if HDFS_STATS + +/*---------------------------------------------------------------------------- + * + * Function: hdfs_reset_stats() + * + * Purpose: + * + * Reset the stats collection elements in this virtual file structure. + * + * Clears any set data in stats bins; initializes/zeroes values. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * - Occurs if the file is invalid somehow + * + * Programmer: Jacob Smith + * 2017-12-08 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +static herr_t +hdfs_reset_stats(H5FD_hdfs_t *file) +{ + unsigned i = 0; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + +#if HDFS_DEBUG + HDprintf("hdfs_reset_stats() called\n"); +#endif + + if (file == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "file was null") + } + + for (i = 0; i <= HDFS_STATS_BIN_COUNT; i++) { + file->raw[i].bytes = 0; + file->raw[i].count = 0; + file->raw[i].min = (unsigned long long)HDFS_STATS_STARTING_MIN; + file->raw[i].max = 0; + + file->meta[i].bytes = 0; + file->meta[i].count = 0; + file->meta[i].min = (unsigned long long)HDFS_STATS_STARTING_MIN; + file->meta[i].max = 0; + } + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* hdfs_reset_stats */ +#endif /* HDFS_STATS */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_open() + * + * Purpose: + * + * Create and/or opens a file as an HDF5 file. + * + * Any flag except H5F_ACC_RDONLY will cause an error. + * + * Return: + * + * Success: A pointer to a new file data structure. + * The public fields will be initialized by the caller, which is + * always H5FD_open(). + * + * Failure: NULL + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static H5FD_t * +H5FD_hdfs_open( + const char *path, + unsigned flags, + hid_t fapl_id, + haddr_t maxaddr) +{ + H5FD_t *ret_value = NULL; +#ifdef H5_HAVE_LIBHDFS + H5FD_hdfs_t *file = NULL; + hdfs_t *handle = NULL; + H5FD_hdfs_fapl_t fa; +#endif + + FUNC_ENTER_NOAPI_NOINIT + +#ifndef H5_HAVE_LIBHDFS + HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, NULL, + "Illegal open of unsupported virtual file (hdfs)"); +#else +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_open() called.\n"); +#endif /* HDFS_DEBUG */ + + /* Sanity check on file offsets */ + HDcompile_assert(sizeof(HDoff_t) >= sizeof(size_t)); + + /* Check arguments */ + if (!path || !*path) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "invalid file name") + } + if (0 == maxaddr || HADDR_UNDEF == maxaddr) { + HGOTO_ERROR(H5E_ARGS, H5E_BADRANGE, NULL, + "bogus maxaddr") + } + if (ADDR_OVERFLOW(maxaddr)) { + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, NULL, + "bogus maxaddr") + } + if (flags != H5F_ACC_RDONLY) { + HGOTO_ERROR(H5E_ARGS, H5E_UNSUPPORTED, NULL, + "only Read-Only access allowed") + } + if (fapl_id == H5P_DEFAULT || fapl_id == H5P_FILE_ACCESS_DEFAULT) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "fapl cannot be H5P_DEFAULT") + } + if (FAIL == H5Pget_fapl_hdfs(fapl_id, &fa)) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "can't get property list") + } + + handle = H5FD_hdfs_handle_open( + path, + fa.namenode_name, + fa.namenode_port, + fa.user_name, + fa.kerberos_ticket_cache, + fa.stream_buffer_size); + + if (handle == NULL) { + HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, + "could not open") + } + + HDassert(handle->magic == HDFS_HDFST_MAGIC); + + /* create new file struct + */ + file = H5FL_CALLOC(H5FD_hdfs_t); + if (file == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, + "unable to allocate file struct") + } + file->hdfs_handle = handle; + HDmemcpy(&(file->fa), &fa, sizeof(H5FD_hdfs_fapl_t)); + +#if HDFS_STATS + if (FAIL == hdfs_reset_stats(file)) { + HGOTO_ERROR(H5E_INTERNAL, H5E_UNINITIALIZED, NULL, + "unable to reset file statistics") + } +#endif /* HDFS_STATS */ + + ret_value = (H5FD_t*)file; +#endif /* H5_HAVE_LIBHDFS */ + +done: +#ifdef H5_HAVE_LIBHDFS + if (ret_value == NULL) { + if (handle != NULL) { + if (FAIL == H5FD_hdfs_handle_close(handle)) { + HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL, + "unable to close HDFS file handle") + } + } + if (file != NULL) { + file = H5FL_FREE(H5FD_hdfs_t, file); + } + } /* if null return value (error) */ +#endif /* H5_HAVE_LIBHDFS */ + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_open() */ + +#if HDFS_STATS + +/*---------------------------------------------------------------------------- + * + * Function: hdfs_fprint_stats() + * + * Purpose: + * + * Tabulate and pretty-print statistics for this virtual file. + * + * Should be called upon file close. + * + * Shows number of reads and bytes read, broken down by + * "raw" (H5FD_MEM_DRAW) + * or "meta" (any other flag) + * + * Prints filename and listing of total number of reads and bytes read, + * both as a grand total and separate meta- and rawdata reads. + * + * If any reads were done, prints out two tables: + * + * 1. overview of raw- and metadata reads + * - min (smallest size read) + * - average of size read + * - k,M,G suffixes by powers of 1024 (2^10) + * - max (largest size read) + * 2. tabulation of "bins", sepraring reads into exponentially-larger + * ranges of size. + * - columns for number of reads, total bytes, and average size, with + * separate sub-colums for raw- and metadata reads. + * - each row represents one bin, identified by the top of its range + * + * Bin ranges can be modified with pound-defines at the top of this file. + * + * Bins without any reads in their bounds are not printed. + * + * An "overflow" bin is also present, to catch "big" reads. + * + * Output for all bins (and range ceiling and average size report) + * is divied by powers of 1024. By corollary, four digits before the decimal + * is valid. + * + * - 41080 bytes is represented by 40.177k, not 41.080k + * - 1004.831M represents approx. 1052642000 bytes + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * - occurs if the file passed in is invalid + * - TODO: if stream is invalid? how can we check this? + * + * Programmer: Jacob Smith + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +static herr_t +hdfs_fprint_stats( + FILE *stream, + const H5FD_hdfs_t *file) +{ + herr_t ret_value = SUCCEED; + parsed_url_t *purl = NULL; + unsigned i = 0; + unsigned long count_meta = 0; + unsigned long count_raw = 0; + double average_meta = 0.0; + double average_raw = 0.0; + unsigned long long min_meta = (unsigned long long)HDFS_STATS_STARTING_MIN; + unsigned long long min_raw = (unsigned long long)HDFS_STATS_STARTING_MIN; + unsigned long long max_meta = 0; + unsigned long long max_raw = 0; + unsigned long long bytes_raw = 0; + unsigned long long bytes_meta = 0; + double re_dub = 0.0; /* re-usable double variable */ + unsigned suffix_i = 0; + const char suffixes[] = { ' ', 'K', 'M', 'G', 'T', 'P' }; + + FUNC_ENTER_NOAPI_NOINIT + + if (stream == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "file stream cannot be null" ) + } + if (file == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "file cannot be null") + } + if (file->hdfs_handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "hdfs handle cannot be null") + } + if (file->hdfs_handle->magic != HDFS_HDFST_MAGIC) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "hdfs handle has invalid magic") + } + + /* TODO: See what libhdfs exposes to us. */ + +#if 0 + if (file->s3r_handle->purl == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "parsed url structure cannot be null") + } + purl = file->s3r_handle->purl; + + /****************** + * PRINT FILENAME * + ******************/ + + HDfprintf(stream, "stats for %s://%s", purl->scheme, purl->host); + if (purl->port != NULL && purl->port[0] != '\0') { + HDfprintf(stream, ":%s", purl->port); + } + if (purl->query != NULL && purl->query[0] != '\0') { + if (purl->path != NULL && purl->path[0] != '\0') { + HDfprintf(stream, "/%s", purl->path); + } else { + HDfprintf(stream, "/"); + } + HDfprintf(stream, "?%s", purl->query); + } else if (purl->path != NULL && purl->path[0] != '\0') { + HDfprintf(stream, "/%s", purl->path); + } + HDfprintf(stream, "\n"); +#endif + + /******************* + * AGGREGATE STATS * + *******************/ + + for (i = 0; i <= HDFS_STATS_BIN_COUNT; i++) { + const hdfs_statsbin *r = &file->raw[i]; + const hdfs_statsbin *m = &file->meta[i]; + + if (m->min < min_meta) min_meta = m->min; + if (r->min < min_raw) min_raw = r->min; + if (m->max > max_meta) max_meta = m->max; + if (r->max > max_raw) max_raw = r->max; + + count_raw += r->count; + count_meta += m->count; + bytes_raw += r->bytes; + bytes_meta += m->bytes; + } + if (count_raw > 0) average_raw = (double)bytes_raw / (double)count_raw; + if (count_meta > 0) average_meta = (double)bytes_meta / (double)count_meta; + + /****************** + * PRINT OVERVIEW * + ******************/ + + HDfprintf(stream, "TOTAL READS: %llu (%llu meta, %llu raw)\n", + count_raw + count_meta, count_meta, count_raw); + HDfprintf(stream, "TOTAL BYTES: %llu (%llu meta, %llu raw)\n", + bytes_raw + bytes_meta, bytes_meta, bytes_raw); + + if (count_raw + count_meta == 0) { + goto done; + } + + /************************* + * PRINT AGGREGATE STATS * + *************************/ + + HDfprintf(stream, "SIZES meta raw\n"); + HDfprintf(stream, " min "); + if (count_meta == 0) { + HDfprintf(stream, " 0.000 "); + } else { + re_dub = (double)min_meta; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) { + re_dub /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]); + } + + if (count_raw == 0) { + HDfprintf(stream, " 0.000 \n"); + } else { + re_dub = (double)min_raw; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) { + re_dub /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]); + } + + HDfprintf(stream, " avg "); + re_dub = (double)average_meta; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) { + re_dub /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]); + + re_dub = (double)average_raw; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) { + re_dub /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]); + + HDfprintf(stream, " max "); + re_dub = (double)max_meta; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) { + re_dub /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]); + + re_dub = (double)max_raw; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) { + re_dub /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]); + + /****************************** + * PRINT INDIVIDUAL BIN STATS * + ******************************/ + + HDfprintf(stream, + "BINS # of reads total bytes average size\n"); + HDfprintf(stream, + " up-to meta raw meta raw meta raw\n"); + + for (i = 0; i <= HDFS_STATS_BIN_COUNT; i++) { + const hdfs_statsbin *m; + const hdfs_statsbin *r; + unsigned long long range_end = 0; + char bm_suffix = ' '; /* bytes-meta */ + double bm_val = 0.0; + char br_suffix = ' '; /* bytes-raw */ + double br_val = 0.0; + char am_suffix = ' '; /* average-meta */ + double am_val = 0.0; + char ar_suffix = ' '; /* average-raw */ + double ar_val = 0.0; + + m = &file->meta[i]; + r = &file->raw[i]; + if (r->count == 0 && m->count == 0) { + continue; + } + + range_end = hdfs_stats_boundaries[i]; + + if (i == HDFS_STATS_BIN_COUNT) { + range_end = hdfs_stats_boundaries[i-1]; + HDfprintf(stream, ">"); + } else { + HDfprintf(stream, " "); + } + + bm_val = (double)m->bytes; + for (suffix_i = 0; bm_val >= 1024.0; suffix_i++) { + bm_val /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + bm_suffix = suffixes[suffix_i]; + + br_val = (double)r->bytes; + for (suffix_i = 0; br_val >= 1024.0; suffix_i++) { + br_val /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + br_suffix = suffixes[suffix_i]; + + if (m->count > 0) { + am_val = (double)(m->bytes) / (double)(m->count); + } + for (suffix_i = 0; am_val >= 1024.0; suffix_i++) { + am_val /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + am_suffix = suffixes[suffix_i]; + + if (r->count > 0) { + ar_val = (double)(r->bytes) / (double)(r->count); + } + for (suffix_i = 0; ar_val >= 1024.0; suffix_i++) { + ar_val /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + ar_suffix = suffixes[suffix_i]; + + re_dub = (double)range_end; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) { + re_dub /= 1024.0; + } + HDassert(suffix_i < sizeof(suffixes)); + + HDfprintf( + stream, + " %8.3f%c %7d %7d %8.3f%c %8.3f%c %8.3f%c %8.3f%c\n", + re_dub, suffixes[suffix_i], /* bin ceiling */ + m->count, /* metadata reads */ + r->count, /* rawdata reads */ + bm_val, bm_suffix, /* metadata bytes */ + br_val, br_suffix, /* rawdata bytes */ + am_val, am_suffix, /* metadata average */ + ar_val, ar_suffix); /* rawdata average */ + fflush(stream); + } + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* hdfs_fprint_stats */ +#endif /* HDFS_STATS */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_close() + * + * Purpose: + * + * Close an HDF5 file. + * + * Return: + * + * SUCCEED/FAIL + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_close(H5FD_t *_file) +{ + herr_t ret_value = SUCCEED; +#ifdef H5_HAVE_LIBHDFS + H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file; +#endif + + FUNC_ENTER_NOAPI_NOINIT + +#ifndef H5_HAVE_LIBHDFS + HGOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, + "Illegal close of unsupported Virtual File (hdfs)") +#else +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_close() called.\n"); +#endif + + /* Sanity checks + */ + HDassert(file != NULL); + HDassert(file->hdfs_handle != NULL); + HDassert(file->hdfs_handle->magic == HDFS_HDFST_MAGIC); + + /* Close the underlying request handle + */ + if (file->hdfs_handle != NULL) { + if (FAIL == H5FD_hdfs_handle_close(file->hdfs_handle)) { + HGOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, + "unable to close HDFS file handle") + } + } + +#if HDFS_STATS + /* TODO: mechanism to re-target stats printout */ + if (FAIL == hdfs_fprint_stats(stdout, file)) { + HGOTO_ERROR(H5E_INTERNAL, H5E_ERROR, FAIL, + "problem while writing file statistics") + } +#endif /* HDFS_STATS */ + + /* Release the file info + */ + file = H5FL_FREE(H5FD_hdfs_t, file); +#endif /* H5_HAVE_LIBHDFS */ + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_hdfs_close() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_cmp() + * + * Purpose: + * + * Compares two files using this driver by their HDFS-provided file info, + * field-by-field. + * + * Return: + * + Equivalent: 0 + * + Not Equivalent: -1 + * + * Programmer: Gerd Herber + * May 2018 + * + * Changes: + * + * + Replace `if (ret_value == 0)` chain with `HGOTO_DONE` jumps. + * Jacob Smith 17 May 2018 + * + *------------------------------------------------------------------------- + */ +static int +H5FD_hdfs_cmp(const H5FD_t *_f1, + const H5FD_t *_f2) +{ + int ret_value = 0; +#ifdef H5_HAVE_LIBHDFS + const H5FD_hdfs_t *f1 = (const H5FD_hdfs_t *)_f1; + const H5FD_hdfs_t *f2 = (const H5FD_hdfs_t *)_f2; + hdfsFileInfo *finfo1 = NULL; + hdfsFileInfo *finfo2 = NULL; +#endif /* H5_HAVE_LIBHDFS */ + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#ifdef H5_HAVE_LIBHDFS +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_cmp() called.\n"); +#endif /* HDFS_DEBUG */ + + HDassert(f1->hdfs_handle != NULL); + HDassert(f2->hdfs_handle != NULL); + HDassert(f1->hdfs_handle->magic == HDFS_HDFST_MAGIC); + HDassert(f2->hdfs_handle->magic == HDFS_HDFST_MAGIC); + + finfo1 = f1->hdfs_handle->fileinfo; + finfo2 = f2->hdfs_handle->fileinfo; + HDassert(finfo1 != NULL); + HDassert(finfo2 != NULL); + + if (finfo1->mKind != finfo2->mKind) HGOTO_DONE(-1); + if (finfo1->mName != finfo2->mName) HGOTO_DONE(-1); + if (finfo1->mLastMod != finfo2->mLastMod) HGOTO_DONE(-1); + if (finfo1->mSize != finfo2->mSize) HGOTO_DONE(-1); + if (finfo1->mReplication != finfo2->mReplication) HGOTO_DONE(-1); + if (finfo1->mBlockSize != finfo2->mBlockSize) HGOTO_DONE(-1); + if (strcmp(finfo1->mOwner, finfo2->mOwner)) HGOTO_DONE(-1); + if (strcmp(finfo1->mGroup, finfo2->mGroup)) HGOTO_DONE(-1); + if (finfo1->mPermissions != finfo2->mPermissions) HGOTO_DONE(-1); + if (finfo1->mLastAccess != finfo2->mLastAccess) HGOTO_DONE(-1); +#endif /* H5_HAVE_LIBHDFS */ + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_cmp() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_hdfs_query + * + * Purpose: Set the flags that this VFL driver is capable of supporting. + * (listed in H5FDpublic.h) + * + * Note that since the HDFS VFD is read only, most flags + * are irrelevant. + * + * The term "set" is highly misleading... + * stores/copies the supported flags in the out-pointer `flags`. + * + * Return: SUCCEED (Can't fail) + * + * Programmer: John Mainzer + * 9/11/17 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_query( + const H5FD_t H5_ATTR_UNUSED *_file, + unsigned long *flags) /* out variable */ +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_query() called.\n"); +#endif + + if (flags) { + *flags = 0; + *flags |= H5FD_FEAT_DATA_SIEVE; + } + + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* H5FD_hdfs_query() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_get_eoa() + * + * Purpose: + * + * Gets the end-of-address marker for the file. The EOA marker + * is the first address past the last byte allocated in the + * format address space. + * + * Return: + * + * The end-of-address marker. + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static haddr_t +H5FD_hdfs_get_eoa( + const H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type) +{ +#ifdef H5_HAVE_LIBHDFS + const H5FD_hdfs_t *file = (const H5FD_hdfs_t *)_file; +#endif /* H5_HAVE_LIBHDFS */ + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_get_eoa() called.\n"); +#endif + +#ifdef H5_HAVE_LIBHDFS + FUNC_LEAVE_NOAPI(file->eoa) +#else + FUNC_LEAVE_NOAPI(0) +#endif /* H5_HAVE_LIBHDFS */ + +} /* end H5FD_hdfs_get_eoa() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_set_eoa() + * + * Purpose: + * + * Set the end-of-address marker for the file. + * + * Return: + * + * SUCCEED (can't fail) + * + * Programmer: Jacob Smith + * 2017-11-03 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_set_eoa( + H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type, + haddr_t addr) +{ +#ifdef H5_HAVE_LIBHDFS + H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file; +#endif /* H5_HAVE_LIBHDFS */ + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_set_eoa() called.\n"); +#endif + +#ifdef H5_HAVE_LIBHDFS + file->eoa = addr; + + FUNC_LEAVE_NOAPI(SUCCEED) +#else + FUNC_LEAVE_NOAPI(FAIL) +#endif /* H5_HAVE_LIBHDFS */ + +} /* H5FD_hdfs_set_eoa() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_get_eof() + * + * Purpose: + * + * Returns the end-of-file marker. + * + * Return: + * + * EOF: the first address past the end of the "file", either the + * filesystem file or the HDF5 file. + * + * Programmer: Jacob Smith + * 2017-11-02 + * + *------------------------------------------------------------------------- + */ +static haddr_t +H5FD_hdfs_get_eof( + const H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type) +{ +#ifdef H5_HAVE_LIBHDFS + const H5FD_hdfs_t *file = (const H5FD_hdfs_t *)_file; +#endif /* H5_HAVE_LIBHDFS */ + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_get_eof() called.\n"); +#endif + +#ifdef H5_HAVE_LIBHDFS + HDassert(file->hdfs_handle != NULL); + HDassert(file->hdfs_handle->magic == HDFS_HDFST_MAGIC); + + FUNC_LEAVE_NOAPI((size_t) file->hdfs_handle->fileinfo->mSize) +#else + FUNC_LEAVE_NOAPI((size_t)0) +#endif /* H5_HAVE_LIBHDFS */ + +} /* end H5FD_hdfs_get_eof() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_get_handle() + * + * Purpose: + * + * Returns the HDFS handle (hdfs_t) of hdfs file driver. + * + * Returns: + * + * SUCCEED/FAIL + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_get_handle( + H5FD_t *_file, + hid_t H5_ATTR_UNUSED fapl, + void **file_handle) +{ + herr_t ret_value = SUCCEED; +#ifdef H5_HAVE_LIBHDFS + H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file; +#endif + + FUNC_ENTER_NOAPI_NOINIT + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_get_handle() called.\n"); +#endif /* HDFS_DEBUG */ + +#ifdef H5_HAVE_LIBHDFS + if (!file_handle) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "file handle not valid") + } + + *file_handle = file->hdfs_handle; +#else + HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL, + "Illegal get-handle of unsupported virtual file (hdfs)"); +#endif /* H5_HAVE_LIBHDFS */ + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_hdfs_get_handle() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_read() + * + * Purpose: + * + * Reads SIZE bytes of data from FILE beginning at address ADDR + * into buffer BUF according to data transfer properties in DXPL_ID. + * + * Return: + * + * Success: `SUCCEED` + * - Result is stored in caller-supplied buffer BUF. + * Failure: `FAIL` + * - Unable to complete read. + * - Contents of buffer `buf` are undefined. + * + * Programmer: Jacob Smith + * 2017-11-?? + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_read( + H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type, + hid_t H5_ATTR_UNUSED dxpl_id, + haddr_t addr, /* start offset */ + size_t size, /* length of read */ + void *buf) /* out */ +{ + herr_t ret_value = SUCCEED; +#if H5_HAVE_LIBHDFS + H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file; + size_t filesize = 0; +#endif /* H5_HAVE_LIBHDFS */ +#if HDFS_STATS + /* working variables for storing stats */ + hdfs_statsbin *bin = NULL; + unsigned bin_i = 0; +#endif /* HDFS_STATS */ + + FUNC_ENTER_NOAPI_NOINIT + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_read() called.\n"); +#endif /* HDFS_DEBUG */ + +#ifndef H5_HAVE_LIBHDFS + HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL, + "Illegal get-handle of unsupported virtual file (hdfs)"); +#else + HDassert(file != NULL); + HDassert(file->hdfs_handle != NULL); + HDassert(file->hdfs_handle->magic == HDFS_HDFST_MAGIC); + HDassert(buf != NULL); + + filesize = (size_t) file->hdfs_handle->fileinfo->mSize; + + if ((addr > filesize) || ((addr + size) > filesize)) { + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, + "range exceeds file address") + } + + if (FAIL == hdfsPread( + file->hdfs_handle->filesystem, + file->hdfs_handle->file, + (tOffset)addr, + buf, + (tSize)size)) + { + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, + "unable to execute read") + } + +#if HDFS_STATS + + /* Find which "bin" this read fits in. Can be "overflow" bin. + */ + for (bin_i = 0; bin_i < HDFS_STATS_BIN_COUNT; bin_i++) { + if ((unsigned long long)size < hdfs_stats_boundaries[bin_i]) { + break; + } + } + bin = (type == H5FD_MEM_DRAW) + ? &file->raw[bin_i] + : &file->meta[bin_i]; + + /* Store collected stats in appropriate bin + */ + if (bin->count == 0) { + bin->min = size; + bin->max = size; + } else { + if (size < bin->min) bin->min = size; + if (size > bin->max) bin->max = size; + } + bin->count++; + bin->bytes += (unsigned long long)size; + +#endif /* HDFS_STATS */ +#endif /* H5_HAVE_LIBHDFS */ + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_hdfs_read() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_write() + * + * Purpose: + * + * Write bytes to file. + * UNSUPPORTED IN READ-ONLY HDFS VFD. + * + * Return: + * + * FAIL (Not possible with Read-Only S3 file.) + * + * Programmer: Jacob Smith + * 2017-10-23 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_write( + H5FD_t H5_ATTR_UNUSED *_file, + H5FD_mem_t H5_ATTR_UNUSED type, + hid_t H5_ATTR_UNUSED dxpl_id, + haddr_t H5_ATTR_UNUSED addr, + size_t H5_ATTR_UNUSED size, + const void H5_ATTR_UNUSED *buf) +{ + herr_t ret_value = FAIL; + + FUNC_ENTER_NOAPI_NOINIT + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_write() called.\n"); +#endif + + HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL, + "cannot write to read-only file.") + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_hdfs_write() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_truncate() + * + * Purpose: + * + * Makes sure that the true file size is the same (or larger) + * than the end-of-address. + * + * NOT POSSIBLE ON READ-ONLY S3 FILES. + * + * Return: + * + * FAIL (Not possible on Read-Only S3 files.) + * + * Programmer: Jacob Smith + * 2017-10-23 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_truncate( + H5FD_t H5_ATTR_UNUSED *_file, + hid_t H5_ATTR_UNUSED dxpl_id, + hbool_t H5_ATTR_UNUSED closing) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + +#if HDFS_DEBUG + HDfprintf(stdout, "H5FD_hdfs_truncate() called.\n"); +#endif + + HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL, + "cannot truncate read-only file.") + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_hdfs_truncate() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_lock() + * + * Purpose: + * + * Place an advisory lock on a file. + * No effect on Read-Only S3 file. + * + * Suggestion: remove lock/unlock from class + * > would result in error at H5FD_[un]lock() (H5FD.c) + * + * Return: + * + * SUCCEED (No-op always succeeds) + * + * Programmer: Jacob Smith + * 2017-11-03 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_lock( + H5FD_t H5_ATTR_UNUSED *_file, + hbool_t H5_ATTR_UNUSED rw) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* end H5FD_hdfs_lock() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_hdfs_unlock() + * + * Purpose: + * + * Remove the existing lock on the file. + * No effect on Read-Only S3 file. + * + * Return: + * + * SUCCEED (No-op always succeeds) + * + * Programmer: Jacob Smith + * 2017-11-03 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_hdfs_unlock(H5FD_t H5_ATTR_UNUSED *_file) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* end H5FD_hdfs_unlock() */ + diff --git a/src/H5FDhdfs.h b/src/H5FDhdfs.h new file mode 100644 index 0000000..3d4128d --- /dev/null +++ b/src/H5FDhdfs.h @@ -0,0 +1,122 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Read-Only HDFS Virtual File Driver (VFD) * + * Copyright (c) 2018, The HDF Group. * + * * + * All rights reserved. * + * * + * NOTICE: * + * All information contained herein is, and remains, the property of The HDF * + * Group. The intellectual and technical concepts contained herein are * + * proprietary to The HDF Group. Dissemination of this information or * + * reproduction of this material is strictly forbidden unless prior written * + * permission is obtained from The HDF Group. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Jacob Smith + * 2018-04-23 + * + * Purpose: The public header file for the hdfs driver. + */ + +#ifndef H5FDhdfs_H +#define H5FDhdfs_H + +#define H5FD_HDFS (H5FD_hdfs_init()) + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * + * Structure: H5FD_hdfs_fapl_t + * + * Purpose: + * + * H5FD_hdfs_fapl_t is a public structure that is used to pass + * configuration information to the appropriate HDFS VFD via the FAPL. + * A pointer to an instance of this structure is a parameter to + * H5Pset_fapl_hdfs() and H5Pget_fapl_hdfs(). + * + * + * + * `version` (int32_t) + * + * Version number of the `H5FD_hdfs_fapl_t` structure. Any instance passed + * to the above calls must have a recognized version number, or an error + * will be flagged. + * + * This field should be set to `H5FD__CURR_HDFS_FAPL_T_VERSION`. + * + * `namenode_name` (const char[]) + * + * Name of "Name Node" to access as the HDFS server. + * + * Must not be longer than `H5FD__HDFS_NODE_NAME_SPACE`. + * + * TBD: Can be NULL. + * + * `namenode_port` (int32_t) TBD + * + * Port number to use to connect with Name Node. + * + * TBD: If 0, uses a default port. + * + * `kerberos_ticket_cache` (const char[]) + * + * Path to the location of the Kerberos authentication cache. + * + * Must not be longer than `H5FD__HDFS_KERB_CACHE_PATH_SPACE`. + * + * TBD: Can be NULL. + * + * `user_name` (const char[]) + * + * Username to use when accessing file. + * + * Must not be longer than `H5FD__HDFS_USER_NAME_SPACE`. + * + * TBD: Can be NULL. + * + * `stream_buffer_size` (int32_t) + * + * Size (in bytes) of the file read stream buffer. + * + * TBD: If -1, relies on a default value. + * + * + * + * Programmer: Jacob Smith + * 2018-04-23 + * + * Changes: None + * + ****************************************************************************/ + +#define H5FD__CURR_HDFS_FAPL_T_VERSION 1 + +#define H5FD__HDFS_NODE_NAME_SPACE 128 +#define H5FD__HDFS_USER_NAME_SPACE 128 +#define H5FD__HDFS_KERB_CACHE_PATH_SPACE 128 + +typedef struct H5FD_hdfs_fapl_t { + int32_t version; + char namenode_name[H5FD__HDFS_NODE_NAME_SPACE + 1]; + int32_t namenode_port; + char user_name[H5FD__HDFS_USER_NAME_SPACE + 1]; + char kerberos_ticket_cache[H5FD__HDFS_KERB_CACHE_PATH_SPACE + 1]; + int32_t stream_buffer_size; +} H5FD_hdfs_fapl_t; + +H5_DLL hid_t H5FD_hdfs_init(void); +H5_DLL herr_t H5Pget_fapl_hdfs(hid_t fapl_id, H5FD_hdfs_fapl_t *fa_out); +H5_DLL herr_t H5Pset_fapl_hdfs(hid_t fapl_id, H5FD_hdfs_fapl_t *fa); + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef H5FDhdfs_H */ + + diff --git a/src/H5FDros3.c b/src/H5FDros3.c new file mode 100644 index 0000000..8bf0420 --- /dev/null +++ b/src/H5FDros3.c @@ -0,0 +1,1847 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Read-Only S3 Virtual File Driver (VFD) * + * Copyright (c) 2017-2018, The HDF Group. * + * * + * All rights reserved. * + * * + * NOTICE: * + * All information contained herein is, and remains, the property of The HDF * + * Group. The intellectual and technical concepts contained herein are * + * proprietary to The HDF Group. Dissemination of this information or * + * reproduction of this material is strictly forbidden unless prior written * + * permission is obtained from The HDF Group. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: Jacob Smith <jake.smith@hdfgroup.org> + * 2017-10-13 + * + * Purpose: + * + * Provide read-only access to files hosted on Amazon's S3 service. + * Relies on "s3comms" utility layer to implement the AWS REST API. + */ + +/* This source code file is part of the H5FD driver module */ +#include "H5FDdrvr_module.h" + +#include "H5private.h" /* Generic Functions */ +#include "H5Eprivate.h" /* Error handling */ +#include "H5FDprivate.h" /* File drivers */ +#include "H5FDros3.h" /* ros3 file driver */ +#include "H5FLprivate.h" /* Free Lists */ +#include "H5Iprivate.h" /* IDs */ +#include "H5MMprivate.h" /* Memory management */ +#include "H5FDs3comms.h" /* S3 Communications */ + +/* toggle function call prints: 1 turns on + */ +#define ROS3_DEBUG 0 + +/* toggle stats collection and reporting + */ +#define ROS3_STATS 0 + +/* The driver identification number, initialized at runtime + */ +static hid_t H5FD_ROS3_g = 0; + +#if ROS3_STATS + +/* arbitrarily large value, such that any reasonable size read will be "less" + * than this value and set a true minimum + * not 0 because that may be a valid recorded minimum in degenerate cases + */ +#define ROS3_STATS_STARTING_MIN 0xfffffffful + +/* Configuration definitions for stats collection and breakdown + * + * 2^10 = 1024 + * Reads up to 1024 bytes (1 kB) fall in bin 0 + * 2^(10+(1*16)) = 2^26 = 64MB + * Reads of 64MB or greater fall in "overflow" bin[BIN_COUNT] + */ +#define ROS3_STATS_BASE 2 +#define ROS3_STATS_INTERVAL 1 +#define ROS3_STATS_START_POWER 10 +#define ROS3_STATS_BIN_COUNT 16 /* MUST BE GREATER THAN 0 */ + + +/* + * Calculate `BASE ^ (START_POWER + (INTERVAL * bin_i))` + * Stores result at `(unsigned long long *) out_ptr`. + * Used in computing boundaries between stats bins. + */ +#define ROS3_STATS_POW(bin_i, out_ptr) { \ + unsigned long long donotshadowresult = 1; \ + unsigned donotshadowindex = 0; \ + for (donotshadowindex = 0; \ + donotshadowindex < (((bin_i) * ROS3_STATS_INTERVAL) + \ + ROS3_STATS_START_POWER); \ + donotshadowindex++) \ + { \ + donotshadowresult *= ROS3_STATS_BASE; \ + } \ + *(out_ptr) = donotshadowresult; \ +} + +/* array to hold pre-computed boundaries for stats bins + */ +static unsigned long long ros3_stats_boundaries[ROS3_STATS_BIN_COUNT]; + +/*************************************************************************** + * + * Structure: ros3_statsbin + * + * Purpose: + * + * Structure for storing per-file ros3 VFD usage statistics. + * + * + * + * `count` (unsigned long long) + * + * Number of reads with size in this bin's range. + * + * `bytes` (unsigned long long) + * + * Total number of bytes read through this bin. + * + * `min` (unsigned long long) + * + * Smallest read size in this bin. + * + * `max` (unsigned long long) + * + * Largest read size in this bin. + * + * + * + * Programmer: Jacob Smith + * + * Changes: None + * + ***************************************************************************/ +typedef struct { + unsigned long long count; + unsigned long long bytes; + unsigned long long min; + unsigned long long max; +} ros3_statsbin; + +#endif /* ROS3_STATS */ + +/*************************************************************************** + * + * Structure: H5FD_ros3_t + * + * Purpose: + * + * H5FD_ros3_t is a structure used to store all information needed to + * maintain R/O access to a single HDF5 file that has been stored as a + * S3 object. This structure is created when such a file is "opened" and + * discarded when it is "closed". + * + * Presents an S3 object as a file to the HDF5 library. + * + * + * + * `pub` (H5FD_t) + * + * Instance of H5FD_t which contains all fields common to all VFDs. + * It must be the first item in this structure, since at higher levels, + * this structure will be treated as an instance of H5FD_t. + * + * `fa` (H5FD_ros3_fapl_t) + * + * Instance of `H5FD_ros3_fapl_t` containing the S3 configuration data + * needed to "open" the HDF5 file. + * + * `eoa` (haddr_t) + * + * End of addressed space in file. After open, it should always + * equal the file size. + * + * `s3r_handle` (s3r_t *) + * + * Instance of S3 Request handle associated with the target resource. + * Responsible for communicating with remote host and presenting file + * contents as indistinguishable from a file on the local filesystem. + * + * *** present only if ROS3_SATS is flagged to enable stats collection *** + * + * `meta` (ros3_statsbin[]) + * `raw` (ros3_statsbin[]) + * + * Only present if ros3 stats collection is enabled. + * + * Arrays of `ros3_statsbin` structures to record raw- and metadata reads. + * + * Records count and size of reads performed by the VFD, and is used to + * print formatted usage statistics to stdout upon VFD shutdown. + * + * Reads of each raw- and metadata type are recorded in an individual bin + * determined by the size of the read. The last bin of each type is + * reserved for "big" reads, with no defined upper bound. + * + * *** end ROS3_STATS *** + * + * + * + * Programmer: Jacob Smith + * + * Changes: None. + * + ***************************************************************************/ +typedef struct H5FD_ros3_t { + H5FD_t pub; + H5FD_ros3_fapl_t fa; + haddr_t eoa; + s3r_t *s3r_handle; +#if ROS3_STATS + ros3_statsbin meta[ROS3_STATS_BIN_COUNT + 1]; + ros3_statsbin raw[ROS3_STATS_BIN_COUNT + 1]; +#endif +} H5FD_ros3_t; + +/* + * These macros check for overflow of various quantities. These macros + * assume that HDoff_t is signed and haddr_t and size_t are unsigned. + * + * ADDR_OVERFLOW: Checks whether a file address of type `haddr_t' + * is too large to be represented by the second argument + * of the file seek function. + * + */ +#define MAXADDR (((haddr_t)1<<(8*sizeof(HDoff_t)-1))-1) +#define ADDR_OVERFLOW(A) (HADDR_UNDEF==(A) || ((A) & ~(haddr_t)MAXADDR)) + +/* Prototypes */ +static herr_t H5FD_ros3_term(void); +static void *H5FD_ros3_fapl_get(H5FD_t *_file); +static void *H5FD_ros3_fapl_copy(const void *_old_fa); +static herr_t H5FD_ros3_fapl_free(void *_fa); +static H5FD_t *H5FD_ros3_open(const char *name, unsigned flags, hid_t fapl_id, + haddr_t maxaddr); +static herr_t H5FD_ros3_close(H5FD_t *_file); +static int H5FD_ros3_cmp(const H5FD_t *_f1, const H5FD_t *_f2); +static herr_t H5FD_ros3_query(const H5FD_t *_f1, unsigned long *flags); +static haddr_t H5FD_ros3_get_eoa(const H5FD_t *_file, H5FD_mem_t type); +static herr_t H5FD_ros3_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr); +static haddr_t H5FD_ros3_get_eof(const H5FD_t *_file, H5FD_mem_t type); +static herr_t H5FD_ros3_get_handle(H5FD_t *_file, hid_t fapl, + void** file_handle); +static herr_t H5FD_ros3_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, + haddr_t addr, size_t size, void *buf); +static herr_t H5FD_ros3_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, + haddr_t addr, size_t size, const void *buf); +static herr_t H5FD_ros3_truncate(H5FD_t *_file, hid_t dxpl_id, + hbool_t closing); +static herr_t H5FD_ros3_lock(H5FD_t *_file, hbool_t rw); +static herr_t H5FD_ros3_unlock(H5FD_t *_file); +static herr_t H5FD_ros3_validate_config(const H5FD_ros3_fapl_t * fa); + +static const H5FD_class_t H5FD_ros3_g = { + "ros3", /* name */ + MAXADDR, /* maxaddr */ + H5F_CLOSE_WEAK, /* fc_degree */ + H5FD_ros3_term, /* terminate */ + NULL, /* sb_size */ + NULL, /* sb_encode */ + NULL, /* sb_decode */ + sizeof(H5FD_ros3_fapl_t), /* fapl_size */ + H5FD_ros3_fapl_get, /* fapl_get */ + H5FD_ros3_fapl_copy, /* fapl_copy */ + H5FD_ros3_fapl_free, /* fapl_free */ + 0, /* dxpl_size */ + NULL, /* dxpl_copy */ + NULL, /* dxpl_free */ + H5FD_ros3_open, /* open */ + H5FD_ros3_close, /* close */ + H5FD_ros3_cmp, /* cmp */ + H5FD_ros3_query, /* query */ + NULL, /* get_type_map */ + NULL, /* alloc */ + NULL, /* free */ + H5FD_ros3_get_eoa, /* get_eoa */ + H5FD_ros3_set_eoa, /* set_eoa */ + H5FD_ros3_get_eof, /* get_eof */ + H5FD_ros3_get_handle, /* get_handle */ + H5FD_ros3_read, /* read */ + H5FD_ros3_write, /* write */ + NULL, /* flush */ + H5FD_ros3_truncate, /* truncate */ + H5FD_ros3_lock, /* lock */ + H5FD_ros3_unlock, /* unlock */ + H5FD_FLMAP_DICHOTOMY /* fl_map */ +}; + +/* Declare a free list to manage the H5FD_ros3_t struct */ +H5FL_DEFINE_STATIC(H5FD_ros3_t); + + +/*------------------------------------------------------------------------- + * Function: H5FD__init_package + * + * Purpose: Initializes any interface-specific data or routines. + * + * Return: Non-negative on success/Negative on failure + * + * Changes: Rename as appropriate for ros3 vfd. + * Jacob Smith 2017 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD__init_package(void) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_STATIC + + if (H5FD_ros3_init() < 0) { + HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, + "unable to initialize ros3 VFD") + } + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD__init_package() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_ros3_init + * + * Purpose: Initialize this driver by registering the driver with the + * library. + * + * Return: Success: The driver ID for the ros3 driver. + * Failure: Negative + * + * Programmer: Robb Matzke + * Thursday, July 29, 1999 + * + * Changes: Rename as appropriate for ros3 vfd. + * Jacob Smith 2017 + * + *------------------------------------------------------------------------- + */ +hid_t +H5FD_ros3_init(void) +{ + hid_t ret_value = H5I_INVALID_HID; /* Return value */ + + FUNC_ENTER_NOAPI(FAIL) + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_init() called.\n"); +#endif + + if (H5I_VFL != H5I_get_type(H5FD_ROS3_g)) + H5FD_ROS3_g = H5FD_register(&H5FD_ros3_g, sizeof(H5FD_class_t), FALSE); + +#if ROS3_STATS + /* pre-compute statsbin boundaries + */ + for (unsigned bin_i = 0; bin_i < ROS3_STATS_BIN_COUNT; bin_i++) { + unsigned long long value = 0; + ROS3_STATS_POW(bin_i, &value) + ros3_stats_boundaries[bin_i] = value; + } +#endif + + /* Set return value */ + ret_value = H5FD_ROS3_g; + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_ros3_init() */ + + +/*--------------------------------------------------------------------------- + * Function: H5FD_ros3_term + * + * Purpose: Shut down the VFD + * + * Returns: SUCCEED (Can't fail) + * + * Programmer: Quincey Koziol + * Friday, Jan 30, 2004 + * + * Changes: Rename as appropriate for ros3 vfd. + * Jacob Smith 2017 + * + *--------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_term(void) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_term() called.\n"); +#endif + + /* Reset VFL ID */ + H5FD_ROS3_g = 0; + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* end H5FD_ros3_term() */ + + +/*------------------------------------------------------------------------- + * Function: H5Pset_fapl_ros3 + * + * Purpose: Modify the file access property list to use the H5FD_ROS3 + * driver defined in this source file. All driver specfic + * properties are passed in as a pointer to a suitably + * initialized instance of H5FD_ros3_fapl_t + * + * Return: SUCCEED/FAIL + * + * Programmer: John Mainzer + * 9/10/17 + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pset_fapl_ros3(hid_t fapl_id, + H5FD_ros3_fapl_t *fa) +{ + H5P_genplist_t *plist = NULL; /* Property list pointer */ + herr_t ret_value = FAIL; + + + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "i*x", fapl_id, fa); + + HDassert(fa != NULL); + +#if ROS3_DEBUG + HDfprintf(stdout, "H5Pset_fapl_ros3() called.\n"); +#endif + + plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS); + if (plist == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \ + "not a file access property list") + } + + if (FAIL == H5FD_ros3_validate_config(fa)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid ros3 config") + + ret_value = H5P_set_driver(plist, H5FD_ROS3, (void *)fa); + +done: + + FUNC_LEAVE_API(ret_value) + +} /* H5Pset_fapl_ros3() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_ros3_validate_config() + * + * Purpose: Test to see if the supplied instance of H5FD_ros3_fapl_t + * contains internally consistant data. Return SUCCEED if so, + * and FAIL otherwise. + * + * Note the difference between internally consistant and + * correct. As we will have to try to access the target + * object to determine whether the supplied data is correct, + * we will settle for internal consistancy at this point + * + * Return: SUCCEED if instance of H5FD_ros3_fapl_t contains internally + * consistant data, FAIL otherwise. + * + * Programmer: Jacob Smith + * 9/10/17 + * + * Changes: Add checks for authenticate flag requring populated + * `aws_region` and `secret_id` strings. + * -- Jacob Smith 2017-11-01 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_validate_config(const H5FD_ros3_fapl_t * fa) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + + HDassert(fa != NULL); + + if ( fa->version != H5FD__CURR_ROS3_FAPL_T_VERSION ) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Unknown H5FD_ros3_fapl_t version"); + } + + /* if set to authenticate, region and id cannot be empty strings + */ + if (fa->authenticate == TRUE) { + if ((fa->aws_region[0] == '\0') || + (fa->secret_id[0] == '\0')) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Inconsistent authentication information"); + } + } + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_ros3_validate_config() */ + + +/*------------------------------------------------------------------------- + * Function: H5Pget_fapl_ros3 + * + * Purpose: Returns information about the ros3 file access property + * list though the function arguments. + * + * Return: Success: Non-negative + * + * Failure: Negative + * + * Programmer: John Mainzer + * 9/10/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +herr_t +H5Pget_fapl_ros3(hid_t fapl_id, + H5FD_ros3_fapl_t *fa_out) +{ + const H5FD_ros3_fapl_t *fa; + H5P_genplist_t *plist = NULL; /* Property list pointer */ + herr_t ret_value = SUCCEED; /* Return value */ + + FUNC_ENTER_API(FAIL) + H5TRACE2("e", "i*x", fapl_id, fa_out); + +#if ROS3_DEBUG + HDfprintf(stdout, "H5Pget_fapl_ros3() called.\n"); +#endif + + if (fa_out == NULL) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "fa_out is NULL") + + plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS); + if (plist == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access list") + } + + if (H5FD_ROS3 != H5P_peek_driver(plist)) { + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver") + } + + fa = (const H5FD_ros3_fapl_t *)H5P_peek_driver_info(plist); + if (fa == NULL) { + HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "bad VFL driver info") + } + + /* Copy the ros3 fapl data out */ + HDmemcpy(fa_out, fa, sizeof(H5FD_ros3_fapl_t)); + +done: + FUNC_LEAVE_API(ret_value) + +} /* H5Pget_fapl_ros3() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_ros3_fapl_get + * + * Purpose: Gets a file access property list which could be used to + * create an identical file. + * + * Return: Success: Ptr to new file access property list value. + * + * Failure: NULL + * + * Programmer: John Mainzer + * 9/8/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void * +H5FD_ros3_fapl_get(H5FD_t *_file) +{ + H5FD_ros3_t *file = (H5FD_ros3_t*)_file; + H5FD_ros3_fapl_t *fa = NULL; + void *ret_value = NULL; + + FUNC_ENTER_NOAPI_NOINIT + + fa = (H5FD_ros3_fapl_t *)H5MM_calloc(sizeof(H5FD_ros3_fapl_t)); + if (fa == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, + "memory allocation failed") + } + + /* Copy the fields of the structure */ + HDmemcpy(fa, &(file->fa), sizeof(H5FD_ros3_fapl_t)); + + /* Set return value */ + ret_value = fa; + +done: + if (ret_value == NULL) { + if (fa != NULL) + H5MM_xfree(fa); + } + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_ros3_fapl_get() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_ros3_fapl_copy + * + * Purpose: Copies the ros3-specific file access properties. + * + * Return: Success: Ptr to a new property list + * + * Failure: NULL + * + * Programmer: John Mainzer + * 9/8/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static void * +H5FD_ros3_fapl_copy(const void *_old_fa) +{ + const H5FD_ros3_fapl_t *old_fa = (const H5FD_ros3_fapl_t*)_old_fa; + H5FD_ros3_fapl_t *new_fa = NULL; + void *ret_value = NULL; /* Return value */ + + FUNC_ENTER_NOAPI_NOINIT + + new_fa = (H5FD_ros3_fapl_t *)H5MM_malloc(sizeof(H5FD_ros3_fapl_t)); + if (new_fa == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, + "memory allocation failed"); + } + + HDmemcpy(new_fa, old_fa, sizeof(H5FD_ros3_fapl_t)); + ret_value = new_fa; + +done: + if (ret_value == NULL) { + if (new_fa != NULL) + H5MM_xfree(new_fa); + } + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_ros3_fapl_copy() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_ros3_fapl_free + * + * Purpose: Frees the ros3-specific file access properties. + * + * Return: SUCCEED (cannot fail) + * + * Programmer: John Mainzer + * 9/8/17 + * + * Modifications: + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_fapl_free(void *_fa) +{ + H5FD_ros3_fapl_t *fa = (H5FD_ros3_fapl_t*)_fa; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + + HDassert(fa != NULL); /* sanity check */ + + H5MM_xfree(fa); + + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* H5FD_ros3_fapl_free() */ + +#if ROS3_STATS + +/*---------------------------------------------------------------------------- + * + * Function: ros3_reset_stats() + * + * Purpose: + * + * Reset the stats collection elements in this virtual file structure. + * + * Clears any set data in stats bins; initializes/zeroes values. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * - Occurs if the file is invalid somehow + * + * Programmer: Jacob Smith + * 2017-12-08 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +static herr_t +ros3_reset_stats(H5FD_ros3_t *file) +{ + unsigned i = 0; + herr_t ret_value = SUCCEED; + + + FUNC_ENTER_NOAPI_NOINIT + +#if ROS3_DEBUG + HDprintf("ros3_reset_stats() called\n"); +#endif + + if (file == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "file was null"); + } + + for (i = 0; i <= ROS3_STATS_BIN_COUNT; i++) { + file->raw[i].bytes = 0; + file->raw[i].count = 0; + file->raw[i].min = (unsigned long long)ROS3_STATS_STARTING_MIN; + file->raw[i].max = 0; + + file->meta[i].bytes = 0; + file->meta[i].count = 0; + file->meta[i].min = (unsigned long long)ROS3_STATS_STARTING_MIN; + file->meta[i].max = 0; + } + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* ros3_reset_stats */ +#endif /* ROS3_STATS */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_open() + * + * Purpose: + * + * Create and/or opens a file as an HDF5 file. + * + * Any flag except H5F_ACC_RDONLY will cause an error. + * + * Name (as received from `H5FD_open()`) must conform to web url: + * NAME :: HTTP "://" DOMAIN [PORT] ["/" [URI] [QUERY] ] + * HTTP :: "http" [ "s" ] + * DOMAIN :: e.g., "mybucket.host.org" + * PORT :: ":" <number> (e.g., ":9000" ) + * URI :: <string> (e.g., "path/to/resource.hd5" ) + * QUERY :: "?" <string> (e.g., "arg1=param1&arg2=param2") + * + * Return: + * + * Success: A pointer to a new file data structure. + * The public fields will be initialized by the caller, which is + * always H5FD_open(). + * + * Failure: NULL + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static H5FD_t * +H5FD_ros3_open(const char *url, + unsigned flags, + hid_t fapl_id, + haddr_t maxaddr) +{ +#ifdef H5_HAVE_ROS3_VFD + H5FD_ros3_t *file = NULL; + struct tm *now = NULL; + char iso8601now[ISO8601_SIZE]; + unsigned char signing_key[SHA256_DIGEST_LENGTH]; + s3r_t *handle = NULL; + H5FD_ros3_fapl_t fa; +#endif + H5FD_t *ret_value = NULL; + + + + FUNC_ENTER_NOAPI_NOINIT +#ifdef H5_HAVE_ROS3_VFD + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_open() called.\n"); +#endif + + + /* Sanity check on file offsets */ + HDcompile_assert(sizeof(HDoff_t) >= sizeof(size_t)); + + /* Check arguments */ + if (!url || !*url) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, "invalid file name") + if (0 == maxaddr || HADDR_UNDEF == maxaddr) + HGOTO_ERROR(H5E_ARGS, H5E_BADRANGE, NULL, "bogus maxaddr") + if (ADDR_OVERFLOW(maxaddr)) + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, NULL, "bogus maxaddr") + if (flags != H5F_ACC_RDONLY) + HGOTO_ERROR(H5E_ARGS, H5E_UNSUPPORTED, NULL, + "only Read-Only access allowed") + + if (FAIL == H5Pget_fapl_ros3(fapl_id, &fa)) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, "can't get property list") + } + + if (CURLE_OK != curl_global_init(CURL_GLOBAL_DEFAULT)) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "unable to initialize curl global (placeholder flags)") + } + + /* open file; procedure depends on whether or not the fapl instructs to + * authenticate requests or not. + */ + if (fa.authenticate == TRUE) { + /* compute signing key (part of AWS/S3 REST API) + * can be re-used by user/key for 7 days after creation. + * find way to re-use/share + */ + now = gmnow(); + HDassert( now != NULL ); + if (ISO8601NOW(iso8601now, now) != (ISO8601_SIZE - 1)) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "problem while writing iso8601 timestamp") + } + if (FAIL == H5FD_s3comms_signing_key(signing_key, + (const char *)fa.secret_key, + (const char *)fa.aws_region, + (const char *)iso8601now) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "problem while computing signing key") + } + + handle = H5FD_s3comms_s3r_open( + url, + (const char *)fa.aws_region, + (const char *)fa.secret_id, + (const unsigned char *)signing_key); + } else { + handle = H5FD_s3comms_s3r_open(url, NULL, NULL, NULL); + } /* if/else should authenticate */ + + if (handle == NULL) { + /* If we want to check CURL's say on the matter in a controlled + * fashion, this is the place to do it, but would need to make a + * few minor changes to s3comms `s3r_t` and `s3r_read()`. + */ + HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, "could not open"); + } + + /* create new file struct + */ + file = H5FL_CALLOC(H5FD_ros3_t); + if (file == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL, + "unable to allocate file struct") + } + + file->s3r_handle = handle; + HDmemcpy(&(file->fa), &fa, sizeof(H5FD_ros3_fapl_t)); + +#if ROS3_STATS + if (FAIL == ros3_reset_stats(file)) { + HGOTO_ERROR(H5E_INTERNAL, H5E_UNINITIALIZED, NULL, + "unable to reset file statistics") + } +#endif /* ROS3_STATS */ + + ret_value = (H5FD_t*)file; + +done: + if (ret_value == NULL) { + if (handle != NULL) { + if (FAIL == H5FD_s3comms_s3r_close(handle)) { + HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL, + "unable to close s3 file handle") + } + } + if (file != NULL) { + file = H5FL_FREE(H5FD_ros3_t, file); + } + curl_global_cleanup(); /* early cleanup because open failed */ + } /* if null return value (error) */ +#endif /* H5_HAVE_ROS3_VFD */ + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_ros3_open() */ + +#if ROS3_STATS + +/*---------------------------------------------------------------------------- + * + * Function: ros3_fprint_stats() + * + * Purpose: + * + * Tabulate and pretty-print statistics for this virtual file. + * + * Should be called upon file close. + * + * Shows number of reads and bytes read, broken down by + * "raw" (H5FD_MEM_DRAW) + * or "meta" (any other flag) + * + * Prints filename and listing of total number of reads and bytes read, + * both as a grand total and separate meta- and rawdata reads. + * + * If any reads were done, prints out two tables: + * + * 1. overview of raw- and metadata reads + * - min (smallest size read) + * - average of size read + * - k,M,G suffixes by powers of 1024 (2^10) + * - max (largest size read) + * 2. tabulation of "bins", sepraring reads into exponentially-larger + * ranges of size. + * - columns for number of reads, total bytes, and average size, with + * separate sub-colums for raw- and metadata reads. + * - each row represents one bin, identified by the top of its range + * + * Bin ranges can be modified with pound-defines at the top of this file. + * + * Bins without any reads in their bounds are not printed. + * + * An "overflow" bin is also present, to catch "big" reads. + * + * Output for all bins (and range ceiling and average size report) + * is divied by powers of 1024. By corollary, four digits before the decimal + * is valid. + * + * - 41080 bytes is represented by 40.177k, not 41.080k + * - 1004.831M represents approx. 1052642000 bytes + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * - occurs if the file passed in is invalid + * - TODO: if stream is invalid? how can we check this? + * + * Programmer: Jacob Smith + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +static herr_t +ros3_fprint_stats(FILE *stream, + const H5FD_ros3_t *file) +{ + herr_t ret_value = SUCCEED; + parsed_url_t *purl = NULL; + unsigned i = 0; + unsigned long count_meta = 0; + unsigned long count_raw = 0; + double average_meta = 0.0; + double average_raw = 0.0; + unsigned long long min_meta = (unsigned long long)ROS3_STATS_STARTING_MIN; + unsigned long long min_raw = (unsigned long long)ROS3_STATS_STARTING_MIN; + unsigned long long max_meta = 0; + unsigned long long max_raw = 0; + unsigned long long bytes_raw = 0; + unsigned long long bytes_meta = 0; + double re_dub = 0.0; /* re-usable double variable */ + unsigned suffix_i = 0; + const char suffixes[] = { ' ', 'K', 'M', 'G', 'T', 'P' }; + + + + FUNC_ENTER_NOAPI_NOINIT + + if (stream == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "file stream cannot be null" ); + } + if (file == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "file cannot be null"); + } + if (file->s3r_handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "s3 request handle cannot be null"); + } + if (file->s3r_handle->purl == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "parsed url structure cannot be null"); + } + purl = file->s3r_handle->purl; + + /****************** + * PRINT FILENAME * + ******************/ + + HDfprintf(stream, "stats for %s://%s", purl->scheme, purl->host); + if (purl->port != NULL && purl->port[0] != '\0') + HDfprintf(stream, ":%s", purl->port); + if (purl->query != NULL && purl->query[0] != '\0') { + if (purl->path != NULL && purl->path[0] != '\0') + HDfprintf(stream, "/%s", purl->path); + else + HDfprintf(stream, "/"); + HDfprintf(stream, "?%s", purl->query); + } else if (purl->path != NULL && purl->path[0] != '\0') { + HDfprintf(stream, "/%s", purl->path); + } + HDfprintf(stream, "\n"); + + /******************* + * AGGREGATE STATS * + *******************/ + + for (i = 0; i <= ROS3_STATS_BIN_COUNT; i++) { + const ros3_statsbin *r = &file->raw[i]; + const ros3_statsbin *m = &file->meta[i]; + + if (m->min < min_meta) min_meta = m->min; + if (r->min < min_raw) min_raw = r->min; + if (m->max > max_meta) max_meta = m->max; + if (r->max > max_raw) max_raw = r->max; + + count_raw += r->count; + count_meta += m->count; + bytes_raw += r->bytes; + bytes_meta += m->bytes; + } + if (count_raw > 0) + average_raw = (double)bytes_raw / (double)count_raw; + if (count_meta > 0) + average_meta = (double)bytes_meta / (double)count_meta; + + /****************** + * PRINT OVERVIEW * + ******************/ + + HDfprintf(stream, "TOTAL READS: %llu (%llu meta, %llu raw)\n", + count_raw + count_meta, count_meta, count_raw); + HDfprintf(stream, "TOTAL BYTES: %llu (%llu meta, %llu raw)\n", + bytes_raw + bytes_meta, bytes_meta, bytes_raw); + + if (count_raw + count_meta == 0) + goto done; + + /************************* + * PRINT AGGREGATE STATS * + *************************/ + + HDfprintf(stream, "SIZES meta raw\n"); + HDfprintf(stream, " min "); + if (count_meta == 0) { + HDfprintf(stream, " 0.000 "); + } else { + re_dub = (double)min_meta; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) + re_dub /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]); + } + + if (count_raw == 0) { + HDfprintf(stream, " 0.000 \n"); + } else { + re_dub = (double)min_raw; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) + re_dub /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]); + } + + HDfprintf(stream, " avg "); + re_dub = (double)average_meta; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) + re_dub /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]); + + re_dub = (double)average_raw; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) + re_dub /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]); + + HDfprintf(stream, " max "); + re_dub = (double)max_meta; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) + re_dub /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]); + + re_dub = (double)max_raw; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) + re_dub /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]); + + /****************************** + * PRINT INDIVIDUAL BIN STATS * + ******************************/ + + HDfprintf(stream, + "BINS # of reads total bytes average size\n"); + HDfprintf(stream, + " up-to meta raw meta raw meta raw\n"); + + for (i = 0; i <= ROS3_STATS_BIN_COUNT; i++) { + const ros3_statsbin *m; + const ros3_statsbin *r; + unsigned long long range_end = 0; + char bm_suffix = ' '; /* bytes-meta */ + double bm_val = 0.0; + char br_suffix = ' '; /* bytes-raw */ + double br_val = 0.0; + char am_suffix = ' '; /* average-meta */ + double am_val = 0.0; + char ar_suffix = ' '; /* average-raw */ + double ar_val = 0.0; + + m = &file->meta[i]; + r = &file->raw[i]; + if (r->count == 0 && m->count == 0) + continue; + + range_end = ros3_stats_boundaries[i]; + + if (i == ROS3_STATS_BIN_COUNT) { + range_end = ros3_stats_boundaries[i-1]; + HDfprintf(stream, ">"); + } else { + HDfprintf(stream, " "); + } + + bm_val = (double)m->bytes; + for (suffix_i = 0; bm_val >= 1024.0; suffix_i++) + bm_val /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + bm_suffix = suffixes[suffix_i]; + + br_val = (double)r->bytes; + for (suffix_i = 0; br_val >= 1024.0; suffix_i++) + br_val /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + br_suffix = suffixes[suffix_i]; + + if (m->count > 0) + am_val = (double)(m->bytes) / (double)(m->count); + for (suffix_i = 0; am_val >= 1024.0; suffix_i++) + am_val /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + am_suffix = suffixes[suffix_i]; + + if (r->count > 0) + ar_val = (double)(r->bytes) / (double)(r->count); + for (suffix_i = 0; ar_val >= 1024.0; suffix_i++) + ar_val /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + ar_suffix = suffixes[suffix_i]; + + re_dub = (double)range_end; + for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) + re_dub /= 1024.0; + HDassert(suffix_i < sizeof(suffixes)); + + HDfprintf(stream, + " %8.3f%c %7d %7d %8.3f%c %8.3f%c %8.3f%c %8.3f%c\n", + re_dub, suffixes[suffix_i], /* bin ceiling */ + m->count, /* metadata reads */ + r->count, /* rawdata reads */ + bm_val, bm_suffix, /* metadata bytes */ + br_val, br_suffix, /* rawdata bytes */ + am_val, am_suffix, /* metadata average */ + ar_val, ar_suffix); /* rawdata average */ + + fflush(stream); + } + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* ros3_fprint_stats */ +#endif /* ROS3_STATS */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_close() + * + * Purpose: + * + * Close an HDF5 file. + * + * Return: + * + * SUCCEED/FAIL + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_close(H5FD_t *_file) +{ +#ifdef H5_HAVE_ROS3_VFD + H5FD_ros3_t *file = (H5FD_ros3_t *)_file; + herr_t ret_value = SUCCEED; +#else + herr_t ret_value = FAIL; +#endif + + + + FUNC_ENTER_NOAPI_NOINIT +#ifdef H5_HAVE_ROS3_VFD + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_close() called.\n"); +#endif + + /* Sanity checks + */ + HDassert(file != NULL); + HDassert(file->s3r_handle != NULL); + + /* Close the underlying request handle + */ + if (FAIL == H5FD_s3comms_s3r_close(file->s3r_handle)) { + HGOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, + "unable to close S3 request handle") + } + +#if ROS3_STATS + /* TODO: mechanism to re-target stats printout */ + if (FAIL == ros3_fprint_stats(stdout, file)) { + HGOTO_ERROR(H5E_INTERNAL, H5E_ERROR, FAIL, + "problem while writing file statistics") + } +#endif /* ROS3_STATS */ + + /* Release the file info + */ + file = H5FL_FREE(H5FD_ros3_t, file); + +done: + curl_global_cleanup(); /* cleanup to answer init on open */ +#endif /* H5_HAVE_ROS3_VFD */ + + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_ros3_close() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_cmp() + * + * Purpose: + * + * Compares two files belonging to this driver using an arbitrary + * (but consistent) ordering: + * + * + url scheme + * + url host + * + url port + * + url path + * + url query + * + fapl aws_region + * + fapl secret_id + * + fapl secret_key + * + * tl;dr -> check URL, check crentials + * + * Return: + * + * - Equivalent: 0 + * - Not Equivalent: -1 + * + * Programmer: Jacob Smith + * 2017-11-06 + * + * Changes: + * + * + Change from strcmp-like return values (-1, 0, 1) to instead return + * binary equivalence (0) or inequality (-1). + * + Replace "if still equal then check this" waterfall with GOTO jumps. + * Jacob Smith 2018-05-17 + * + *------------------------------------------------------------------------- + */ +static int +H5FD_ros3_cmp(const H5FD_t *_f1, + const H5FD_t *_f2) +{ +#ifdef H5_HAVE_ROS3_VFD + const H5FD_ros3_t *f1 = (const H5FD_ros3_t *)_f1; + const H5FD_ros3_t *f2 = (const H5FD_ros3_t *)_f2; + const parsed_url_t *purl1 = NULL; + const parsed_url_t *purl2 = NULL; +#endif + int ret_value = 0; + + + + FUNC_ENTER_NOAPI_NOINIT_NOERR +#ifdef H5_HAVE_ROS3_VFD + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_cmp() called.\n"); +#endif + + HDassert(f1->s3r_handle != NULL); + HDassert(f2->s3r_handle != NULL); + + purl1 = (const parsed_url_t *)f1->s3r_handle->purl; + purl2 = (const parsed_url_t *)f2->s3r_handle->purl; + HDassert(purl1 != NULL); + HDassert(purl2 != NULL); + HDassert(purl1->scheme != NULL); + HDassert(purl2->scheme != NULL); + HDassert(purl1->host != NULL); + HDassert(purl2->host != NULL); + + /* URL: SCHEME */ + if (strcmp(purl1->scheme, purl2->scheme)) HGOTO_DONE(-1); + + /* URL: HOST */ + if (strcmp(purl1->host, purl2->host)) HGOTO_DONE(-1); + + /* URL: PORT */ + if (purl1->port && purl2->port) { + if (strcmp(purl1->port, purl2->port)) HGOTO_DONE(-1); + } else if (purl1->port) { + HGOTO_DONE(-1); + } else if (purl2->port) { + HGOTO_DONE(-1); + } + + /* URL: PATH */ + if (purl1->path && purl2->path) { + if (strcmp(purl1->path, purl2->path)) HGOTO_DONE(-1); + } else if (purl1->path && !purl2->path) { + HGOTO_DONE(-1); + } else if (purl2->path && !purl1->path) { + HGOTO_DONE(-1); + } + + /* URL: QUERY */ + if (purl1->query && purl2->query) { + if (strcmp(purl1->query, purl2->query)) HGOTO_DONE(-1); + } else if (purl1->query && !purl2->query) { + HGOTO_DONE(-1); + } else if (purl2->query && !purl1->query) { + HGOTO_DONE(-1); + } + + /* FAPL: AWS_REGION */ + if (f1->fa.aws_region[0] != '\0' && f1->fa.aws_region[0] != '\0') { + if (strcmp(f1->fa.aws_region, f2->fa.aws_region)) HGOTO_DONE(-1); + } else if (f1->fa.aws_region[0] != '\0') { + HGOTO_DONE(-1); + } else if (f2->fa.aws_region[0] != '\0') { + HGOTO_DONE(-1); + } + + /* FAPL: SECRET_ID */ + if (f1->fa.secret_id[0] != '\0' && f1->fa.secret_id[0] != '\0') { + if (strcmp(f1->fa.secret_id, f2->fa.secret_id)) HGOTO_DONE(-1); + } else if (f1->fa.secret_id[0] != '\0') { + HGOTO_DONE(-1); + } else if (f2->fa.secret_id[0] != '\0') { + HGOTO_DONE(-1); + } + + /* FAPL: SECRET_KEY */ + if (f1->fa.secret_key[0] != '\0' && f1->fa.secret_key[0] != '\0') { + if (strcmp(f1->fa.secret_key, f2->fa.secret_key)) HGOTO_DONE(-1); + } else if (f1->fa.secret_key[0] != '\0') { + HGOTO_DONE(-1); + } else if (f2->fa.secret_key[0] != '\0') { + HGOTO_DONE(-1); + } +#endif /* H5_HAVE_ROS3_VFD */ + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_ros3_cmp() */ + + +/*------------------------------------------------------------------------- + * Function: H5FD_ros3_query + * + * Purpose: Set the flags that this VFL driver is capable of supporting. + * (listed in H5FDpublic.h) + * + * Note that since the ROS3 VFD is read only, most flags + * are irrelevant. + * + * The term "set" is highly misleading... + * stores/copies the supported flags in the out-pointer `flags`. + * + * Return: SUCCEED (Can't fail) + * + * Programmer: John Mainzer + * 9/11/17 + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_query(const H5FD_t H5_ATTR_UNUSED *_file, + unsigned long *flags /* out */) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_query() called.\n"); +#endif + + /* Set the VFL feature flags that this driver supports */ + if (flags) { + *flags = 0; + /* OK to perform data sieving for faster raw data reads & writes */ + *flags |= H5FD_FEAT_DATA_SIEVE; + } /* end if */ + + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* H5FD_ros3_query() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_get_eoa() + * + * Purpose: + * + * Gets the end-of-address marker for the file. The EOA marker + * is the first address past the last byte allocated in the + * format address space. + * + * Return: + * + * The end-of-address marker. + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static haddr_t +H5FD_ros3_get_eoa(const H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type) +{ + const H5FD_ros3_t *file = (const H5FD_ros3_t *)_file; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_get_eoa() called.\n"); +#endif + + FUNC_LEAVE_NOAPI(file->eoa) + +} /* end H5FD_ros3_get_eoa() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_set_eoa() + * + * Purpose: + * + * Set the end-of-address marker for the file. + * + * Return: + * + * SUCCEED (can't fail) + * + * Programmer: Jacob Smith + * 2017-11-03 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_set_eoa(H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type, + haddr_t addr) +{ + H5FD_ros3_t *file = (H5FD_ros3_t *)_file; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_set_eoa() called.\n"); +#endif + + file->eoa = addr; + + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* H5FD_ros3_set_eoa() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_get_eof() + * + * Purpose: + * + * Returns the end-of-file marker. + * + * Return: + * + * EOF: the first address past the end of the "file", either the + * filesystem file or the HDF5 file. + * + * Programmer: Jacob Smith + * 2017-11-02 + * + *------------------------------------------------------------------------- + */ +static haddr_t +H5FD_ros3_get_eof(const H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type) +{ + const H5FD_ros3_t *file = (const H5FD_ros3_t *)_file; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_get_eof() called.\n"); +#endif + + FUNC_LEAVE_NOAPI(H5FD_s3comms_s3r_get_filesize(file->s3r_handle)) + +} /* end H5FD_ros3_get_eof() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_get_handle() + * + * Purpose: + * + * Returns the S3 Request handle (s3r_t) of ros3 file driver. + * + * Returns: + * + * SUCCEED/FAIL + * + * Programmer: Jacob Smith + * 2017-11-02 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_get_handle(H5FD_t *_file, + hid_t H5_ATTR_UNUSED fapl, + void **file_handle) +{ + H5FD_ros3_t *file = (H5FD_ros3_t *)_file; + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_get_handle() called.\n"); +#endif + + if(!file_handle) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file handle not valid") + + *file_handle = file->s3r_handle; + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_ros3_get_handle() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_read() + * + * Purpose: + * + * Reads SIZE bytes of data from FILE beginning at address ADDR + * into buffer BUF according to data transfer properties in DXPL_ID. + * + * Return: + * + * Success: `SUCCEED` + * - Result is stored in caller-supplied buffer BUF. + * Failure: `FAIL` + * - Unable to complete read. + * - Contents of buffer `buf` are undefined. + * + * Programmer: Jacob Smith + * 2017-11-?? + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_read(H5FD_t *_file, + H5FD_mem_t H5_ATTR_UNUSED type, + hid_t H5_ATTR_UNUSED dxpl_id, + haddr_t addr, /* start offset */ + size_t size, /* length of read */ + void *buf) /* out */ +{ + H5FD_ros3_t *file = (H5FD_ros3_t *)_file; + size_t filesize = 0; + herr_t ret_value = SUCCEED; /* Return value */ +#if ROS3_STATS + /* working variables for storing stats */ + ros3_statsbin *bin = NULL; + unsigned bin_i = 0; +#endif /* ROS3_STATS */ + + + FUNC_ENTER_NOAPI_NOINIT + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_read() called.\n"); +#endif + + HDassert(file != NULL); + HDassert(file->s3r_handle != NULL); + HDassert(buf != NULL); + + filesize = H5FD_s3comms_s3r_get_filesize(file->s3r_handle); + + if ((addr > filesize) || ((addr + size) > filesize)) { + HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "range exceeds file address") + } + + if (FAIL == H5FD_s3comms_s3r_read(file->s3r_handle, addr, size, buf) ) { + HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "unable to execute read") + } + +#if ROS3_STATS + + /* Find which "bin" this read fits in. Can be "overflow" bin. + */ + for (bin_i = 0; bin_i < ROS3_STATS_BIN_COUNT; bin_i++) { + if ((unsigned long long)size < ros3_stats_boundaries[bin_i]) + break; + } + bin = (type == H5FD_MEM_DRAW) + ? &file->raw[bin_i] + : &file->meta[bin_i]; + + /* Store collected stats in appropriate bin + */ + if (bin->count == 0) { + bin->min = size; + bin->max = size; + } else { + if (size < bin->min) + bin->min = size; + if (size > bin->max) + bin->max = size; + } + bin->count++; + bin->bytes += (unsigned long long)size; + +#endif /* ROS3_STATS */ + + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_ros3_read() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_write() + * + * Purpose: + * + * Write bytes to file. + * UNSUPPORTED IN READ-ONLY ROS3 VFD. + * + * Return: + * + * FAIL (Not possible with Read-Only S3 file.) + * + * Programmer: Jacob Smith + * 2017-10-23 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_write(H5FD_t H5_ATTR_UNUSED *_file, + H5FD_mem_t H5_ATTR_UNUSED type, + hid_t H5_ATTR_UNUSED dxpl_id, + haddr_t H5_ATTR_UNUSED addr, + size_t H5_ATTR_UNUSED size, + const void H5_ATTR_UNUSED *buf) +{ + herr_t ret_value = FAIL; + + FUNC_ENTER_NOAPI_NOINIT + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_write() called.\n"); +#endif + + HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL, + "cannot write to read-only file.") + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_ros3_write() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_truncate() + * + * Purpose: + * + * Makes sure that the true file size is the same (or larger) + * than the end-of-address. + * + * NOT POSSIBLE ON READ-ONLY S3 FILES. + * + * Return: + * + * FAIL (Not possible on Read-Only S3 files.) + * + * Programmer: Jacob Smith + * 2017-10-23 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_truncate(H5FD_t H5_ATTR_UNUSED *_file, + hid_t H5_ATTR_UNUSED dxpl_id, + hbool_t H5_ATTR_UNUSED closing) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + +#if ROS3_DEBUG + HDfprintf(stdout, "H5FD_ros3_truncate() called.\n"); +#endif + + HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL, + "cannot truncate read-only file.") + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* end H5FD_ros3_truncate() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_lock() + * + * Purpose: + * + * Place an advisory lock on a file. + * No effect on Read-Only S3 file. + * + * Suggestion: remove lock/unlock from class + * > would result in error at H5FD_[un]lock() (H5FD.c) + * + * Return: + * + * SUCCEED (No-op always succeeds) + * + * Programmer: Jacob Smith + * 2017-11-03 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_lock(H5FD_t H5_ATTR_UNUSED *_file, + hbool_t H5_ATTR_UNUSED rw) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* end H5FD_ros3_lock() */ + + +/*------------------------------------------------------------------------- + * + * Function: H5FD_ros3_unlock() + * + * Purpose: + * + * Remove the existing lock on the file. + * No effect on Read-Only S3 file. + * + * Return: + * + * SUCCEED (No-op always succeeds) + * + * Programmer: Jacob Smith + * 2017-11-03 + * + * Changes: None. + * + *------------------------------------------------------------------------- + */ +static herr_t +H5FD_ros3_unlock(H5FD_t H5_ATTR_UNUSED *_file) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + FUNC_LEAVE_NOAPI(SUCCEED) + +} /* end H5FD_ros3_unlock() */ + + diff --git a/src/H5FDros3.h b/src/H5FDros3.h new file mode 100644 index 0000000..49e757c --- /dev/null +++ b/src/H5FDros3.h @@ -0,0 +1,105 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Read-Only S3 Virtual File Driver (VFD) * + * Copyright (c) 2017-2018, The HDF Group. * + * * + * All rights reserved. * + * * + * NOTICE: * + * All information contained herein is, and remains, the property of The HDF * + * Group. The intellectual and technical concepts contained herein are * + * proprietary to The HDF Group. Dissemination of this information or * + * reproduction of this material is strictly forbidden unless prior written * + * permission is obtained from The HDF Group. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* + * Programmer: John Mainzer + * 2017-10-10 + * + * Purpose: The public header file for the ros3 driver. + */ +#ifndef H5FDros3_H +#define H5FDros3_H + +#define H5FD_ROS3 (H5FD_ros3_init()) + +#ifdef __cplusplus +extern "C" { +#endif + +/**************************************************************************** + * + * Structure: H5FD_ros3_fapl_t + * + * Purpose: + * + * H5FD_ros3_fapl_t is a public structure that is used to pass S3 + * authentication data to the appropriate S3 VFD via the FAPL. A pointer + * to an instance of this structure is a parameter to H5Pset_fapl_ros3() + * and H5Pget_fapl_ros3(). + * + * + * + * `version` (int32_t) + * + * Version number of the H5FD_ros3_fapl_t structure. Any instance passed + * to the above calls must have a recognized version number, or an error + * will be flagged. + * + * This field should be set to H5FD__CURR_ROS3_FAPL_T_VERSION. + * + * `authenticate` (hbool_t) + * + * Flag TRUE or FALSE whether or not requests are to be authenticated + * with the AWS4 algorithm. + * If TRUE, `aws_region`, `secret_id`, and `secret_key` must be populated. + * If FALSE, those three components are unused. + * + * `aws_region` (char[]) + * + * String: name of the AWS "region" of the host, e.g. "us-east-1". + * + * `secret_id` (char[]) + * + * String: "Access ID" for the resource. + * + * `secret_key` (char[]) + * + * String: "Secret Access Key" associated with the ID and resource. + * + * + * + * Programmer: John Mainzer + * + * Changes: + * + * - Add documentation of fields (except `version`) + * --- Jacob Smith 2017-12-04 + * + ****************************************************************************/ + +#define H5FD__CURR_ROS3_FAPL_T_VERSION 1 + +#define H5FD__ROS3_MAX_REGION_LEN 32 +#define H5FD__ROS3_MAX_SECRET_ID_LEN 128 +#define H5FD__ROS3_MAX_SECRET_KEY_LEN 128 + +typedef struct H5FD_ros3_fapl_t { + int32_t version; + hbool_t authenticate; + char aws_region[H5FD__ROS3_MAX_REGION_LEN + 1]; + char secret_id[H5FD__ROS3_MAX_SECRET_ID_LEN + 1]; + char secret_key[H5FD__ROS3_MAX_SECRET_KEY_LEN + 1]; +} H5FD_ros3_fapl_t; + +H5_DLL hid_t H5FD_ros3_init(void); +H5_DLL herr_t H5Pget_fapl_ros3(hid_t fapl_id, H5FD_ros3_fapl_t * fa_out); +H5_DLL herr_t H5Pset_fapl_ros3(hid_t fapl_id, H5FD_ros3_fapl_t * fa); + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef H5FDros3_H */ + + diff --git a/src/H5FDs3comms.c b/src/H5FDs3comms.c new file mode 100644 index 0000000..7caeacb --- /dev/null +++ b/src/H5FDs3comms.c @@ -0,0 +1,3770 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Read-Only S3 Virtual File Driver (VFD) * + * Copyright (c) 2017-2018, The HDF Group. * + * * + * All rights reserved. * + * * + * NOTICE: * + * All information contained herein is, and remains, the property of The HDF * + * Group. The intellectual and technical concepts contained herein are * + * proprietary to The HDF Group. Dissemination of this information or * + * reproduction of this material is strictly forbidden unless prior written * + * permission is obtained from The HDF Group. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/***************************************************************************** + * Source for S3 Communications module + * + * ***NOT A FILE DRIVER*** + * + * Provide functions and structures required for interfacing with Amazon + * Simple Storage Service (S3). + * + * Provide S3 object access as if it were a local file. + * + * Connect to remote host, send and receive HTTP requests and responses + * as part of the AWS REST API, authenticating requests as appropriate. + * + * Programmer: Jacob Smith + * 2017-11-30 + * + *****************************************************************************/ + +/****************/ +/* Module Setup */ +/****************/ + +/***********/ +/* Headers */ +/***********/ + +#include "H5private.h" /* generic functions */ +#include "H5Eprivate.h" /* error handling */ +#include "H5MMprivate.h" /* memory management */ +#include "H5FDs3comms.h" /* S3 Communications */ + +/****************/ +/* Local Macros */ +/****************/ + +/* toggle debugging (enable with 1) + */ +#define S3COMMS_DEBUG 0 + +/* manipulate verbosity of CURL output + * operates separately from S3COMMS_DEBUG + * + * 0 -> no explicit curl output + * 1 -> on error, print failure info to stderr + * 2 -> in addition to above, print information for all performs; sets all + * curl handles with CURLOPT_VERBOSE + */ +#define S3COMMS_CURL_VERBOSITY 0 + +/* size to allocate for "bytes=<first_byte>[-<last_byte>]" HTTP Range value + */ +#define S3COMMS_MAX_RANGE_STRING_SIZE 128 + +/******************/ +/* Local Typedefs */ +/******************/ + +/********************/ +/* Local Structures */ +/********************/ + +/* struct s3r_datastruct + * Structure passed to curl write callback + * pointer to data region and record of bytes written (offset) + */ +struct s3r_datastruct { + unsigned long magic; + char *data; + size_t size; +}; +#define S3COMMS_CALLBACK_DATASTRUCT_MAGIC 0x28c2b2ul + +/********************/ +/* Local Prototypes */ +/********************/ + +size_t curlwritecallback(char *ptr, + size_t size, + size_t nmemb, + void *userdata); + +herr_t H5FD_s3comms_s3r_getsize(s3r_t *handle); + +/*********************/ +/* Package Variables */ +/*********************/ + +/*****************************/ +/* Library Private Variables */ +/*****************************/ + +/*******************/ +/* Local Variables */ +/*******************/ + +/*************/ +/* Functions */ +/*************/ + + +/*---------------------------------------------------------------------------- + * + * Function: curlwritecallback() + * + * Purpose: + * + * Function called by CURL to write received data. + * + * Writes bytes to `userdata`. + * + * Internally manages number of bytes processed. + * + * Return: + * + * - Number of bytes processed. + * - Should equal number of bytes passed to callback. + * - Failure will result in curl error: CURLE_WRITE_ERROR. + * + * Programmer: Jacob Smith + * 2017-08-17 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +size_t +curlwritecallback(char *ptr, + size_t size, + size_t nmemb, + void *userdata) +{ + struct s3r_datastruct *sds = (struct s3r_datastruct *)userdata; + size_t product = (size * nmemb); + size_t written = 0; + + if (sds->magic != S3COMMS_CALLBACK_DATASTRUCT_MAGIC) + return written; + + if (size > 0) { + HDmemcpy(&(sds->data[sds->size]), ptr, product); + sds->size += product; + written = product; + } + + return written; + +} /* curlwritecallback */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_hrb_node_set() + * + * Purpose: + * + * Create, insert, modify, and remove elements in a field node list. + * + * `name` cannot be null; will return FAIL and list will be unaltered. + * + * Entries are accessed via the lowercase representation of their name: + * "Host", "host", and "hOSt" would all access the same node, + * but name's case is relevant in HTTP request output. + * + * List pointer `L` must always point to either of : + * - header node with lowest alphabetical order (by lowername) + * - NULL, if list is empty + * + * Types of operations: + * + * - CREATE + * - If `L` is NULL and `name` and `value` are not NULL, + * a new node is created at `L`, starting a list. + * - MODIFY + * - If a node is found with a matching lowercase name and `value` + * is not NULL, the existing name, value, and cat values are released + * and replaced with the new data. + * - No modifications are made to the list pointers. + * - REMOVE + * - If `value` is NULL, will attempt to remove node with matching + * lowercase name. + * - If no match found, returns FAIL and list is not modified. + * - When removing a node, all its resources is released. + * - If removing the last node in the list, list pointer is set to NULL. + * - INSERT + * - If no nodes exists with matching lowercase name and `value` + * is not NULL, a new node is created, inserted into list + * alphabetically by lowercase name. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - List was successfully modified + * - FAILURE: `FAIL` + * - Unable to perform operation + * - Forbidden (attempting to remove absent node, e.g.) + * - Internal error + * + * Programmer: Jacob Smith + * 2017-09-22 + * + * Changes: + * + * - Change return value to herr_t + * - Change list pointer to pointer-to-pointer-to-node + * - Change to use singly-linked list (from twin doubly-linked lists) + * with modification to hrb_node_t + * --- Jake Smith 2017-01-17 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_hrb_node_set(hrb_node_t **L, + const char *name, + const char *value) +{ + size_t i = 0; + char *valuecpy = NULL; + char *namecpy = NULL; + size_t namelen = 0; + char *lowername = NULL; + char *nvcat = NULL; + hrb_node_t *node_ptr = NULL; + hrb_node_t *new_node = NULL; + hbool_t is_looking = TRUE; + herr_t ret_value = SUCCEED; + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_hrb_node_set.\n"); + HDprintf("NAME: %s\n", name); + HDprintf("VALUE: %s\n", value); + HDprintf("LIST:\n->"); + for (node_ptr = (*L); node_ptr != NULL; node_ptr = node_ptr->next) + HDfprintf(stdout, "{%s}\n->", node_ptr->cat); + HDprintf("(null)\n"); + fflush(stdout); + node_ptr = NULL; +#endif + + if (name == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to operate on null name.\n"); + } + namelen = HDstrlen(name); + + /*********************** + * PREPARE ALL STRINGS * + **********************/ + + /* copy and lowercase name + */ + lowername = (char *)H5MM_malloc(sizeof(char) * (namelen + 1)); + if (lowername == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, + "cannot make space for lowercase name copy.\n"); + } + for (i = 0; i < namelen; i++) { + lowername[i] = (char)tolower((int)name[i]); + } + lowername[namelen] = 0; + + /* If value supplied, copy name, value, and concatenated "name: value". + * If NULL, we will be removing a node or doing nothing, so no need for + * copies + */ + if (value != NULL) { + size_t valuelen = HDstrlen(value); + size_t catlen = namelen + valuelen + 2; /* HDstrlen(": ") -> +2 */ + int sprint_ret = 0; + + namecpy = (char *)H5MM_malloc(sizeof(char) * (namelen + 1)); + if (namecpy == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, + "cannot make space for name copy.\n"); + } + HDmemcpy(namecpy, name, namelen + 1); + + valuecpy = (char *)H5MM_malloc(sizeof(char) * (valuelen + 1)); + if (valuecpy == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, + "cannot make space for value copy.\n"); + } + HDmemcpy(valuecpy, value, valuelen + 1); + + nvcat = (char *)H5MM_malloc(sizeof(char) * (catlen + 1)); + if (nvcat == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, + "cannot make space for concatenated string.\n"); + } + sprint_ret = HDsnprintf(nvcat, (catlen + 1), "%s: %s", name, value); + if (sprint_ret <= 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "error while concatenating `%s: %s", name, value); + HDassert( catlen == (size_t)sprint_ret ); + + /* create new_node, should we need it + */ + new_node = (hrb_node_t *)H5MM_malloc(sizeof(hrb_node_t)); + if (new_node == NULL) { + HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL, + "cannot make space for new set.\n"); + } + + new_node->magic = S3COMMS_HRB_NODE_MAGIC; + new_node->name = NULL; + new_node->value = NULL; + new_node->cat = NULL; + new_node->lowername = NULL; + new_node->next = NULL; + } + + /*************** + * ACT ON LIST * + ***************/ + + if (*L == NULL) { + if (value == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "trying to remove node from empty list"); + } else { +#if S3COMMS_DEBUG +HDprintf("CREATE NEW\n"); fflush(stdout); +#endif + /******************* + * CREATE NEW LIST * + *******************/ + + new_node->cat = nvcat; + new_node->name = namecpy; + new_node->lowername = lowername; + new_node->value = valuecpy; + + *L = new_node; + goto done; /* bypass further seeking */ + } + } + + /* sanity-check pointer passed in + */ + HDassert( (*L) != NULL ); + HDassert( (*L)->magic == S3COMMS_HRB_NODE_MAGIC ); + node_ptr = (*L); + + /* Check whether to modify/remove first node in list + */ + if (strcmp(lowername, node_ptr->lowername) == 0) { + + is_looking = FALSE; + + if (value == NULL) { +#if S3COMMS_DEBUG +HDprintf("REMOVE HEAD\n"); fflush(stdout); +#endif + /*************** + * REMOVE HEAD * + ***************/ + + *L = node_ptr->next; + +#if S3COMMS_DEBUG +HDprintf("FREEING CAT (node)\n"); fflush(stdout); +#endif + H5MM_xfree(node_ptr->cat); +#if S3COMMS_DEBUG +HDprintf("FREEING LOWERNAME (node)\n"); fflush(stdout); +#endif + H5MM_xfree(node_ptr->lowername); +#if S3COMMS_DEBUG +HDprintf("FREEING NAME (node)\n"); fflush(stdout); +#endif + H5MM_xfree(node_ptr->name); +#if S3COMMS_DEBUG +HDprintf("FREEING VALUE (node)\n"); fflush(stdout); +#endif + H5MM_xfree(node_ptr->value); +#if S3COMMS_DEBUG +HDprintf("MAGIC OK? %s\n", + (node_ptr->magic == S3COMMS_HRB_NODE_MAGIC) ? "YES" : "NO"); +fflush(stdout); +#endif + HDassert( node_ptr->magic == S3COMMS_HRB_NODE_MAGIC ); + node_ptr->magic += 1ul; +#if S3COMMS_DEBUG +HDprintf("FREEING POINTER\n"); fflush(stdout); +#endif + H5MM_xfree(node_ptr); + +#if S3COMMS_DEBUG +HDprintf("FREEING WORKING LOWERNAME\n"); fflush(stdout); +#endif + H5MM_xfree(lowername); lowername = NULL; + } else { +#if S3COMMS_DEBUG +HDprintf("MODIFY HEAD\n"); fflush(stdout); +#endif + /*************** + * MODIFY HEAD * + ***************/ + + H5MM_xfree(node_ptr->cat); + H5MM_xfree(node_ptr->name); + H5MM_xfree(node_ptr->value); + + node_ptr->name = namecpy; + node_ptr->value = valuecpy; + node_ptr->cat = nvcat; + + H5MM_xfree(lowername); + lowername = NULL; + new_node->magic += 1ul; + H5MM_xfree(new_node); + new_node = NULL; + } + } else if (strcmp(lowername, node_ptr->lowername) < 0) { + + is_looking = FALSE; + + if (value == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "trying to remove a node 'before' head"); + } else { +#if S3COMMS_DEBUG +HDprintf("PREPEND NEW HEAD\n"); fflush(stdout); +#endif + /******************* + * INSERT NEW HEAD * + *******************/ + + new_node->name = namecpy; + new_node->value = valuecpy; + new_node->lowername = lowername; + new_node->cat = nvcat; + new_node->next = node_ptr; + *L = new_node; + } + } + + /*************** + * SEARCH LIST * + ***************/ + + while (is_looking) { + if (node_ptr->next == NULL) { + + is_looking = FALSE; + + if (value == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "trying to remove absent node"); + } else { +#if S3COMMS_DEBUG +HDprintf("APPEND A NODE\n"); fflush(stdout); +#endif + /******************* + * APPEND NEW NODE * + *******************/ + + HDassert( strcmp(lowername, node_ptr->lowername) > 0 ); + new_node->name = namecpy; + new_node->value = valuecpy; + new_node->lowername = lowername; + new_node->cat = nvcat; + node_ptr->next = new_node; + } + } else if (strcmp(lowername, node_ptr->next->lowername) < 0) { + + is_looking = FALSE; + + if (value == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "trying to remove absent node"); + } else { +#if S3COMMS_DEBUG +HDprintf("INSERT A NODE\n"); fflush(stdout); +#endif + /******************* + * INSERT NEW NODE * + *******************/ + + HDassert( strcmp(lowername, node_ptr->lowername) > 0 ); + new_node->name = namecpy; + new_node->value = valuecpy; + new_node->lowername = lowername; + new_node->cat = nvcat; + new_node->next = node_ptr->next; + node_ptr->next = new_node; + } + } else if (strcmp(lowername, node_ptr->next->lowername) == 0) { + + is_looking = FALSE; + + if (value == NULL) { + /***************** + * REMOVE A NODE * + *****************/ + + hrb_node_t *tmp = node_ptr->next; + node_ptr->next = tmp->next; + +#if S3COMMS_DEBUG +HDprintf("REMOVE A NODE\n"); fflush(stdout); +#endif + H5MM_xfree(tmp->cat); + H5MM_xfree(tmp->lowername); + H5MM_xfree(tmp->name); + H5MM_xfree(tmp->value); + + HDassert( tmp->magic == S3COMMS_HRB_NODE_MAGIC ); + tmp->magic += 1ul; + H5MM_xfree(tmp); + + H5MM_xfree(lowername); + lowername = NULL; + } else { +#if S3COMMS_DEBUG +HDprintf("MODIFY A NODE\n"); fflush(stdout); +#endif + /***************** + * MODIFY A NODE * + *****************/ + + node_ptr = node_ptr->next; + H5MM_xfree(node_ptr->name); + H5MM_xfree(node_ptr->value); + H5MM_xfree(node_ptr->cat); + + HDassert( new_node->magic == S3COMMS_HRB_NODE_MAGIC ); + new_node->magic += 1ul; + H5MM_xfree(new_node); + H5MM_xfree(lowername); + new_node = NULL; + lowername = NULL; + + node_ptr->name = namecpy; + node_ptr->value = valuecpy; + node_ptr->cat = nvcat; + } + } else { + /**************** + * KEEP LOOKING * + ****************/ + + node_ptr = node_ptr->next; + } + } + +done: + if (ret_value == FAIL) { + /* clean up + */ + if (nvcat != NULL) H5MM_xfree(nvcat); + if (namecpy != NULL) H5MM_xfree(namecpy); + if (lowername != NULL) H5MM_xfree(lowername); + if (valuecpy != NULL) H5MM_xfree(valuecpy); + if (new_node != NULL) { + HDassert( new_node->magic == S3COMMS_HRB_NODE_MAGIC ); + new_node->magic += 1ul; + H5MM_xfree(new_node); + } + } + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_hrb_node_set */ + + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_hrb_destroy() + * + * Purpose: + * + * Destroy and free resources _directly_ associated with an HTTP Buffer. + * + * Takes a pointer to pointer to the buffer structure. + * This allows for the pointer itself to be NULLed from within the call. + * + * If buffer or buffer pointer is NULL, there is no effect. + * + * Headers list at `first_header` is not touched. + * + * - Programmer should re-use or destroy `first_header` pointer + * (hrb_node_t *) as suits their purposes. + * - Recommend fetching prior to destroy() + * e.g., `reuse_node = hrb_to_die->first_header; destroy(hrb_to_die);` + * or maintaining an external reference. + * - Destroy node/list separately as appropriate + * - Failure to account for this will result in a memory leak. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - successfully released buffer resources + * - if `buf` is NULL or `*buf` is NULL, no effect + * - FAILURE: `FAIL` + * - `buf->magic != S3COMMS_HRB_MAGIC` + * + * Programmer: Jacob Smith + * 2017-07-21 + * + * Changes: + * + * - Conditional free() of `hrb_node_t` pointer properties based on + * `which_free` property. + * --- Jacob Smith 2017-08-08 + * + * - Integrate with HDF5. + * - Returns herr_t instead of nothing. + * --- Jacob Smith 2017-09-21 + * + * - Change argument to from *buf to **buf, to null pointer within call + * --- Jacob Smith 2017-20-05 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_hrb_destroy(hrb_t **_buf) +{ + hrb_t *buf = NULL; + herr_t ret_value = SUCCEED; + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_hrb_destroy.\n"); +#endif + + if (_buf != NULL && *_buf != NULL) { + buf = *_buf; + if (buf->magic != S3COMMS_HRB_MAGIC) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "pointer's magic does not match.\n"); + } + + H5MM_xfree(buf->verb); + H5MM_xfree(buf->version); + H5MM_xfree(buf->resource); + buf->magic += 1ul; + H5MM_xfree(buf); + *_buf = NULL; + } + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_s3comms_hrb_destroy */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_hrb_init_request() + * + * Purpose: + * + * Create a new HTTP Request Buffer + * + * All non-null arguments should be null-terminated strings. + * + * If `verb` is NULL, defaults to "GET". + * If `http_version` is NULL, defaults to "HTTP/1.1". + * + * `resource` cannot be NULL; should be string beginning with slash + * character ('/'). + * + * All strings are copied into the structure, making them safe from + * modification in source strings. + * + * Return: + * + * - SUCCESS: pointer to new `hrb_t` + * - FAILURE: `NULL` + * + * Programmer: Jacob Smith + * 2017-07-21 + * + * Changes: + * + * - Update struct membership for newer 'generic' `hrb_t` format. + * --- Jacob Smith, 2017-07-24 + * + * - Rename from `hrb_new()` to `hrb_request()` + * --- Jacob Smith, 2017-07-25 + * + * - Integrate with HDF5. + * - Rename from 'hrb_request()` to `H5FD_s3comms_hrb_init_request()`. + * - Remove `host` from input parameters. + * - Host, as with all other fields, must now be added through the + * add-field functions. + * - Add `version` (HTTP version string, e.g. "HTTP/1.1") to parameters. + * --- Jacob Smith 2017-09-20 + * + * - Update to use linked-list `hrb_node_t` headers in structure. + * --- Jacob Smith 2017-10-05 + * + *---------------------------------------------------------------------------- + */ +hrb_t * +H5FD_s3comms_hrb_init_request(const char *_verb, + const char *_resource, + const char *_http_version) +{ + hrb_t *request = NULL; + char *res = NULL; + size_t reslen = 0; + hrb_t *ret_value = NULL; + char *verb = NULL; + size_t verblen = 0; + char *vrsn = NULL; + size_t vrsnlen = 0; + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_hrb_init_request.\n"); +#endif + + if (_resource == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "resource string cannot be null.\n"); + } + + /* populate valid NULLs with defaults + */ + if (_verb == NULL) + _verb = "GET"; + + if (_http_version == NULL) + _http_version = "HTTP/1.1"; + + /* malloc space for and prepare structure + */ + request = (hrb_t *)H5MM_malloc(sizeof(hrb_t)); + if (request == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL, + "no space for request structure"); + } + request->magic = S3COMMS_HRB_MAGIC; + request->body = NULL; + request->body_len = 0; + request->first_header = NULL; + + + + /* malloc and copy strings for the structure + */ + if (_resource[0] == '/') { + reslen = HDstrlen(_resource) + 1; + res = (char *)H5MM_malloc(sizeof(char) * reslen); + if (res == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL, + "no space for resource string"); + } + HDstrncpy(res, _resource, reslen); + } else { + int sprint_ret = 0; + reslen = HDstrlen(_resource) + 2; + res = (char *)H5MM_malloc(sizeof(char) * reslen); + if (res == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL, + "no space for resource string"); + } + sprint_ret = HDsnprintf(res, reslen, "/%s", _resource); + if (sprint_ret <= 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "errro while appending resource string %s", _resource); + HDassert( (reslen - 1) == (size_t)sprint_ret ); + } /* start resource string with '/' */ + + verblen = HDstrlen(_verb) + 1; + verb = (char *)H5MM_malloc(sizeof(char) * verblen); + if (verb == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "no space for verb string"); + } + HDstrncpy(verb, _verb, verblen); + + vrsnlen = HDstrlen(_http_version) + 1; + vrsn = (char *)H5MM_malloc(sizeof(char) * vrsnlen); + if (vrsn == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "no space for http-version string"); + } + HDstrncpy(vrsn, _http_version, vrsnlen); + + + + /* place new copies into structure + */ + request->resource = res; + request->verb = verb; + request->version = vrsn; + + ret_value = request; + +done: + + /* if there is an error, clean up after ourselves + */ + if (ret_value == NULL) { + if (request != NULL) H5MM_xfree(request); + if (vrsn != NULL) H5MM_xfree(vrsn); + if (verb != NULL) H5MM_xfree(verb); + if (res != NULL) H5MM_xfree(res); + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_s3comms_hrb_init_request */ + + + +/**************************************************************************** + * S3R FUNCTIONS + ****************************************************************************/ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_s3r_close() + * + * Purpose: + * + * Close communications through given S3 Request Handle (`s3r_t`) + * and clean up associated resources. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * - fails if handle is null or has invalid magic number + * + * + * Programmer: Jacob Smith + * 2017-08-31 + * + * Changes: + * + * - Remove all messiness related to the now-gone "setopt" utility + * as it no longer exists in the handle. + * - Return type to `void`. + * --- Jacob Smith 2017-09-01 + * + * - Incorporate into HDF environment. + * - Rename from `s3r_close()` to `H5FD_s3comms_s3r_close()`. + * --- Jacob Smith 2017-10-06 + * + * - Change separate host, resource, port info to `parsed_url_t` struct ptr. + * --- Jacob Smith 2017-11-01 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_s3r_close(s3r_t *handle) +{ + herr_t ret_value = SUCCEED; + + + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5_HAVE_ROS3_VFD +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_s3r_close.\n"); +#endif + + if (handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle cannot be null.\n"); + } + if (handle->magic != S3COMMS_S3R_MAGIC) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle has invalid magic.\n"); + } + + curl_easy_cleanup(handle->curlhandle); + + H5MM_xfree(handle->secret_id); + H5MM_xfree(handle->region); + H5MM_xfree(handle->signing_key); + + HDassert( handle->httpverb != NULL ); + H5MM_xfree(handle->httpverb); + + if (FAIL == H5FD_s3comms_free_purl(handle->purl)) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to release parsed url structure") + } + + H5MM_xfree(handle); + +#endif /* H5_HAVE_ROS3_VFD */ + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_s3comms_s3r_close */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_s3r_get_filesize() + * + * Purpose: + * + * Retrieve the filesize of an open request handle. + * + * Wrapper "getter" to hide implementation details. + * + * + * Return: + * + * - SUCCESS: size of file, in bytes, if handle is valid. + * - FAILURE: 0, if handle is NULL or undefined. + * + * Programmer: Jacob Smith 2017-01-14 + * + * Changes: None + * + *---------------------------------------------------------------------------- + */ +size_t +H5FD_s3comms_s3r_get_filesize(s3r_t *handle) { + + size_t ret_value = 0; + + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#ifdef H5_HAVE_ROS3_VFD + if (handle != NULL) + ret_value = handle->filesize; +#endif /* H5_HAVE_ROS3_VFD */ + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_s3comms_s3r_get_filesize */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_s3r_getsize() + * + * Purpose: + * + * Get the number of bytes of handle's target resource. + * + * Sets handle and curlhandle with to enact an HTTP HEAD request on file, + * and parses received headers to extract "Content-Length" from response + * headers, storing file size at `handle->filesize`. + * + * Critical step in opening (initiating) an `s3r_t` handle. + * + * Wraps `s3r_read()`. + * Sets curlhandle to write headers to a temporary buffer (using extant + * write callback) and provides no buffer for body. + * + * Upon exit, unsets HTTP HEAD settings from curl handle, returning to + * initial state. In event of error, curl handle state is undefined and is + * not to be trusted. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * + * Programmer: Jacob Smith + * 2017-08-23 + * + * Changes: + * + * - Update to revised `s3r_t` format and life cycle. + * --- Jacob Smith 2017-09-01 + * + * - Conditional change to static header buffer and structure. + * --- Jacob Smith 2017-09-05 + * + * - Incorporate into HDF environment. + * - Rename from `s3r_getsize()` to `H5FD_s3comms_s3r_getsize()`. + * --- Jacob Smith 2017-10-06 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_s3r_getsize(s3r_t *handle) +{ +#ifdef H5_HAVE_ROS3_VFD + unsigned long int content_length = 0; + CURL *curlh = NULL; + char *end = NULL; + char *headerresponse = NULL; + herr_t ret_value = SUCCEED; + struct s3r_datastruct sds = { + S3COMMS_CALLBACK_DATASTRUCT_MAGIC, + NULL, + 0 }; + char *start = NULL; +#else + herr_t ret_value = FAIL; +#endif /* H5_HAVE_ROS3_VFD */ + + + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5_HAVE_ROS3_VFD + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_s3r_getsize.\n"); +#endif + + if (handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle cannot be null.\n"); + } + if (handle->magic != S3COMMS_S3R_MAGIC) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle has invalid magic.\n"); + } + if (handle->curlhandle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle has bad (null) curlhandle.\n") + } + + /******************** + * PREPARE FOR HEAD * + ********************/ + + curlh = handle->curlhandle; + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_NOBODY, + 1L) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "error while setting CURL option (CURLOPT_NOBODY). " + "(placeholder flags)"); + } + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_HEADERDATA, + &sds) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "error while setting CURL option (CURLOPT_HEADERDATA). " + "(placeholder flags)"); + } + + HDassert( handle->httpverb == NULL ); + handle->httpverb = (char *)H5MM_malloc(sizeof(char) * 16); + if (handle->httpverb == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "unable to allocate space for S3 request HTTP verb"); + } + HDmemcpy(handle->httpverb, "HEAD", 5); + + headerresponse = (char *)H5MM_malloc(sizeof(char) * CURL_MAX_HTTP_HEADER); + if (headerresponse == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "unable to allocate space for curl header response"); + } + sds.data = headerresponse; + + /******************* + * PERFORM REQUEST * + *******************/ + + /* these parameters fetch the entire file, + * but, with a NULL destination and NOBODY and HEADERDATA supplied above, + * only http metadata will be sent by server and recorded by s3comms + */ + if (FAIL == + H5FD_s3comms_s3r_read(handle, 0, 0, NULL) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem in reading during getsize.\n"); + } + + if (sds.size > CURL_MAX_HTTP_HEADER) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "HTTP metadata buffer overrun\n"); + } else if (sds.size == 0) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "No HTTP metadata\n"); +#if S3COMMS_DEBUG + } else { + HDfprintf(stderr, "GETSIZE: OK\n"); +#endif + } + + + /****************** + * PARSE RESPONSE * + ******************/ + + start = strstr(headerresponse, + "\r\nContent-Length: "); + if (start == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not find \"Content-Length\" in response.\n"); + } + + /* move "start" to beginning of value in line; find end of line + */ + start = start + HDstrlen("\r\nContent-Length: "); + end = strstr(start, "\r\n"); + if (end == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not find end of content length line"); + } + + /* place null terminator at end of numbers + */ + *end = '\0'; + + content_length = strtoul((const char *)start, + NULL, + 0); + if (content_length == 0 || + content_length == ULONG_MAX || + errno == ERANGE) /* errno set by strtoul */ + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not convert found \"Content-Length\" response (\"%s\")", + start); /* range is null-terminated, remember */ + } + + handle->filesize = (size_t)content_length; + + /********************** + * UNDO HEAD SETTINGS * + **********************/ + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_NOBODY, + 0) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "error while setting CURL option (CURLOPT_NOBODY). " + "(placeholder flags)"); + } + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_HEADERDATA, + 0) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "error while setting CURL option (CURLOPT_HEADERDATA). " + "(placeholder flags)"); + } + +done: + H5MM_xfree(headerresponse); + sds.magic += 1; /* set to bad magic */ + +#endif /* H5_HAVE_ROS3_VFD */ + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_s3r_getsize */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_s3r_open() + * + * Purpose: + * + * Logically 'open' a file hosted on S3. + * + * - create new Request Handle + * - copy supplied url + * - copy authentication info if supplied + * - create CURL handle + * - fetch size of file + * - connect with server and execute HEAD request + * - return request handle ready for reads + * + * To use 'default' port to connect, `port` should be 0. + * + * To prevent AWS4 authentication, pass null pointer to `region`, `id`, + * and `signing_key`. + * + * Uses `H5FD_s3comms_parse_url()` to validate and parse url input. + * + * Return: + * + * - SUCCESS: Pointer to new request handle. + * - FAILURE: NULL + * - occurs if: + * - authentication strings are inconsistent + * - must _all_ be null, or have at least `region` and `id` + * - url is NULL (no filename) + * - unable to parse url (malformed?) + * - error while performing `getsize()` + * + * Programmer: Jacob Smith + * 2017-09-01 + * + * Changes: + * + * - Incorporate into HDF environment. + * - Rename from `s3r_open()` to `H5FD_s3comms_s3r_open()`. + * --- Jacob Smith 2017-10-06 + * + * - Remove port number from signature. + * - Name (`url`) must be complete url with http scheme and optional port + * number in string. + * - e.g., "http://bucket.aws.com:9000/myfile.dat?query=param" + * - Internal storage of host, resource, and port information moved into + * `parsed_url_t` struct pointer. + * --- Jacob Smith 2017-11-01 + * + *---------------------------------------------------------------------------- + */ +s3r_t * +H5FD_s3comms_s3r_open(const char *url, + const char *region, + const char *id, + const unsigned char *signing_key) +{ +#ifdef H5_HAVE_ROS3_VFD + size_t tmplen = 0; + CURL *curlh = NULL; + s3r_t *handle = NULL; + parsed_url_t *purl = NULL; +#endif + s3r_t *ret_value = NULL; + + + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5_HAVE_ROS3_VFD + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_s3r_open.\n"); +#endif + + + + if (url == NULL || url[0] == '\0') { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "url cannot be null.\n"); + } + + if (FAIL == H5FD_s3comms_parse_url(url, &purl)) { + /* probably a malformed url, but could be internal error */ + HGOTO_ERROR(H5E_ARGS, H5E_CANTCREATE, NULL, + "unable to create parsed url structure"); + } + HDassert( purl != NULL ); /* if above passes, this must be true */ + HDassert( purl->magic == S3COMMS_PARSED_URL_MAGIC ); + + handle = (s3r_t *)H5MM_malloc(sizeof(s3r_t)); + if (handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL, + "could not malloc space for handle.\n"); + } + + handle->magic = S3COMMS_S3R_MAGIC; + handle->purl = purl; + handle->filesize = 0; + handle->region = NULL; + handle->secret_id = NULL; + handle->signing_key = NULL; + handle->httpverb = NULL; + + /************************************* + * RECORD AUTHENTICATION INFORMATION * + *************************************/ + + if ((region != NULL && *region != '\0') || + (id != NULL && *id != '\0') || + (signing_key != NULL && *signing_key != '\0')) + { + /* if one exists, all three must exist + */ + if (region == NULL || region[0] == '\0') { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "region cannot be null.\n"); + } + if (id == NULL || id[0] == '\0') { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "secret id cannot be null.\n"); + } + if (signing_key == NULL || signing_key[0] == '\0') { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "signing key cannot be null.\n"); + } + + /* copy strings + */ + tmplen = HDstrlen(region) + 1; + handle->region = (char *)H5MM_malloc(sizeof(char) * tmplen); + if (handle->region == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "could not malloc space for handle region copy.\n"); + } + HDmemcpy(handle->region, region, tmplen); + + tmplen = HDstrlen(id) + 1; + handle->secret_id = (char *)H5MM_malloc(sizeof(char) * tmplen); + if (handle->secret_id == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "could not malloc space for handle ID copy.\n"); + } + HDmemcpy(handle->secret_id, id, tmplen); + + tmplen = SHA256_DIGEST_LENGTH; + handle->signing_key = + (unsigned char *)H5MM_malloc(sizeof(unsigned char) * tmplen); + if (handle->signing_key == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "could not malloc space for handle key copy.\n"); + } + HDmemcpy(handle->signing_key, signing_key, tmplen); + } /* if authentication information provided */ + + /************************ + * INITIATE CURL HANDLE * + ************************/ + + curlh = curl_easy_init(); + + if (curlh == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "problem creating curl easy handle!\n"); + } + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_HTTPGET, + 1L) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "error while setting CURL option (CURLOPT_HTTPGET). " + "(placeholder flags)"); + } + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_HTTP_VERSION, + CURL_HTTP_VERSION_1_1) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "error while setting CURL option (CURLOPT_HTTP_VERSION). " + "(placeholder flags)"); + } + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_FAILONERROR, + 1L) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "error while setting CURL option (CURLOPT_FAILONERROR). " + "(placeholder flags)"); + } + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_WRITEFUNCTION, + curlwritecallback) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "error while setting CURL option (CURLOPT_WRITEFUNCTION). " + "(placeholder flags)"); + } + + if ( CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_URL, + url) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "error while setting CURL option (CURLOPT_URL). " + "(placeholder flags)"); + } + +#if S3COMMS_CURL_VERBOSITY > 1 + /* CURL will print (to stdout) information for each operation + */ + curl_easy_setopt(curlh, CURLOPT_VERBOSE, 1L); +#endif + + handle->curlhandle = curlh; + + /******************* + * OPEN CONNECTION * + * * * * * * * * * * + * GET FILE SIZE * + *******************/ + + if (FAIL == + H5FD_s3comms_s3r_getsize(handle) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "problem in H5FD_s3comms_s3r_getsize.\n"); + } + + /********************* + * FINAL PREPARATION * + *********************/ + + HDassert( handle->httpverb != NULL ); + HDmemcpy(handle->httpverb, "GET", 4); + + ret_value = handle; +#endif /* H5_HAVE_ROS3_VFD */ + +done: + if (ret_value == NULL) { +#ifdef H5_HAVE_ROS3_VFD + if (curlh != NULL) { + curl_easy_cleanup(curlh); + } + if (FAIL == H5FD_s3comms_free_purl(purl)) { + HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, + "unable to free parsed url structure") + } + if (handle != NULL) { + H5MM_xfree(handle->region); + H5MM_xfree(handle->secret_id); + H5MM_xfree(handle->signing_key); + if (handle->httpverb != NULL) { + H5MM_xfree(handle->httpverb); + } + H5MM_xfree(handle); + } +#endif /* H5_HAVE_ROS3_VFD */ + } + + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_s3comms_s3r_open */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_s3r_read() + * + * Purpose: + * + * Read file pointed to by request handle, writing specified + * `offset` .. `offset + len` bytes to buffer `dest`. + * + * If `len` is 0, reads entirety of file starting at `offset`. + * If `offset` and `len` are both 0, reads entire file. + * + * If `offset` or `offset+len` is greater than the file size, read is + * aborted and returns `FAIL`. + * + * Uses configured "curl easy handle" to perform request. + * + * In event of error, buffer should remain unaltered. + * + * If handle is set to authorize a request, creates a new (temporary) + * HTTP Request object (hrb_t) for generating requisite headers, + * which is then translated to a `curl slist` and set in the curl handle + * for the request. + * + * `dest` _may_ be NULL, but no body data will be recorded. + * + * - In general practice, NULL should never be passed in as `dest`. + * - NULL `dest` passed in by internal function `s3r_getsize()`, in + * conjunction with CURLOPT_NOBODY to preempt transmission of file data + * from server. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * + * Programmer: Jacob Smith + * 2017-08-22 + * + * Changes: + * + * - Revise structure to prevent unnecessary hrb_t element creation. + * - Rename tmprstr -> rangebytesstr to reflect purpose. + * - Insert needed `free()`s, particularly for `sds`. + * --- Jacob Smith 2017-08-23 + * + * - Revise heavily to accept buffer, range as parameters. + * - Utilize modified s3r_t format. + * --- Jacob Smith 2017-08-31 + * + * - Incorporate into HDF library. + * - Rename from `s3r_read()` to `H5FD_s3comms_s3r_read()`. + * - Return `herr_t` succeed/fail instead of S3code. + * - Update to use revised `hrb_t` and `hrb_node_t` structures. + * --- Jacob Smith 2017-10-06 + * + * - Update to use `parsed_url_t *purl` in handle. + * --- Jacob Smith 2017-11-01 + * + * - Better define behavior upon read past EOF + * --- Jacob Smith 2017-01-19 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_s3r_read(s3r_t *handle, + haddr_t offset, + size_t len, + void *dest) +{ +#ifdef H5_HAVE_ROS3_VFD + CURL *curlh = NULL; + CURLcode p_status = CURLE_OK; + struct curl_slist *curlheaders = NULL; + hrb_node_t *headers = NULL; + hrb_node_t *node = NULL; + struct tm *now = NULL; + char *rangebytesstr = NULL; + hrb_t *request = NULL; + int ret = 0; /* working variable to check */ + /* return value of HDsnprintf */ + struct s3r_datastruct *sds = NULL; + herr_t ret_value = SUCCEED; +#else + herr_t ret_value = FAIL; +#endif /* H5_HAVE_ROS3_VFD */ + + + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5_HAVE_ROS3_VFD + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_s3r_read.\n"); +#endif + + /************************************** + * ABSOLUTELY NECESSARY SANITY-CHECKS * + **************************************/ + + if (handle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle cannot be null.\n"); + } + if (handle->magic != S3COMMS_S3R_MAGIC) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle has invalid magic.\n"); + } + if (handle->curlhandle == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle has bad (null) curlhandle.\n") + } + if (handle->purl == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle has bad (null) url.\n") + } + HDassert( handle->purl->magic == S3COMMS_PARSED_URL_MAGIC ); + if (offset > handle->filesize || (len + offset) > handle->filesize) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to read past EoF") + } + + curlh = handle->curlhandle; + + /********************* + * PREPARE WRITEDATA * + *********************/ + + if (dest != NULL) { + sds = (struct s3r_datastruct *)H5MM_malloc( + sizeof(struct s3r_datastruct)); + if (sds == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "could not malloc destination datastructure.\n"); + } + + sds->magic = S3COMMS_CALLBACK_DATASTRUCT_MAGIC; + sds->data = (char *)dest; + sds->size = 0; + if (CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_WRITEDATA, + sds) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_UNINITIALIZED, FAIL, + "error while setting CURL option (CURLOPT_WRITEDATA). " + "(placeholder flags)"); + } + } + + /********************* + * FORMAT HTTP RANGE * + *********************/ + + if (len > 0) { + rangebytesstr = (char *)H5MM_malloc(sizeof(char) * \ + S3COMMS_MAX_RANGE_STRING_SIZE ); + if (rangebytesstr == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "could not malloc range format string.\n"); + } + ret = HDsnprintf(rangebytesstr, + (S3COMMS_MAX_RANGE_STRING_SIZE), + "bytes="H5_PRINTF_HADDR_FMT"-"H5_PRINTF_HADDR_FMT, + offset, + offset + len - 1); + if (ret == 0 || ret >= S3COMMS_MAX_RANGE_STRING_SIZE) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to format HTTP Range value"); + } else if (offset > 0) { + rangebytesstr = (char *)H5MM_malloc(sizeof(char) * \ + S3COMMS_MAX_RANGE_STRING_SIZE); + if (rangebytesstr == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "could not malloc range format string.\n"); + } + ret = HDsnprintf(rangebytesstr, + (S3COMMS_MAX_RANGE_STRING_SIZE), + "bytes="H5_PRINTF_HADDR_FMT"-", + offset); + if (ret == 0 || ret >= S3COMMS_MAX_RANGE_STRING_SIZE) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to format HTTP Range value"); + } + + /******************* + * COMPILE REQUEST * + *******************/ + + if (handle->signing_key == NULL) { + /* Do not authenticate. + */ + if (rangebytesstr != NULL) { + /* Pass in range directly + */ + char *bytesrange_ptr = NULL; /* pointer past "bytes=" portion */ + + bytesrange_ptr = strchr(rangebytesstr, '='); + HDassert( bytesrange_ptr != NULL ); + bytesrange_ptr++; /* move to first char past '=' */ + HDassert( *bytesrange_ptr != '\0' ); + + if (CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_RANGE, + bytesrange_ptr) ) + { + HGOTO_ERROR(H5E_VFL, H5E_UNINITIALIZED, FAIL, + "error while setting CURL option (CURLOPT_RANGE). "); + } + } + } else { + /* authenticate request + */ + char authorization[512]; + /* 512 := approximate max length... + * 67 <len("AWS4-HMAC-SHA256 Credential=///s3/aws4_request," + * "SignedHeaders=,Signature=")> + * + 8 <yyyyMMDD> + * + 64 <hex(sha256())> + * + 128 <max? len(secret_id)> + * + 20 <max? len(region)> + * + 128 <max? len(signed_headers)> + */ + char buffer1[512]; /* -> Canonical Request -> Signature */ + char buffer2[256]; /* -> String To Sign -> Credential */ + char iso8601now[ISO8601_SIZE]; + char signed_headers[48]; + /* should be large enough for nominal listing: + * "host;range;x-amz-content-sha256;x-amz-date" + * + '\0', with "range;" possibly absent + */ + + /* zero start of strings */ + authorization[0] = 0; + buffer1[0] = 0; + buffer2[0] = 0; + iso8601now[0] = 0; + signed_headers[0] = 0; + + /**** VERIFY INFORMATION EXISTS ****/ + + if (handle->region == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle must have non-null region.\n"); + } + if (handle->secret_id == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle must have non-null secret_id.\n"); + } + if (handle->signing_key == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle must have non-null signing_key.\n"); + } + if (handle->httpverb == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle must have non-null httpverb.\n"); + } + if (handle->purl->host == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle must have non-null host.\n"); + } + if (handle->purl->path == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "handle must have non-null resource.\n"); + } + + /**** CREATE HTTP REQUEST STRUCTURE (hrb_t) ****/ + + request = H5FD_s3comms_hrb_init_request( + (const char *)handle->httpverb, + (const char *)handle->purl->path, + "HTTP/1.1"); + if (request == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not allocate hrb_t request.\n"); + } + HDassert( request->magic == S3COMMS_HRB_MAGIC ); + + now = gmnow(); + if (ISO8601NOW(iso8601now, now) != (ISO8601_SIZE - 1)) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not format ISO8601 time.\n"); + } + + if (FAIL == + H5FD_s3comms_hrb_node_set( + &headers, + "x-amz-date", + (const char *)iso8601now) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to set x-amz-date header") + } + if (headers == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem building headers list. " + "(placeholder flags)\n"); + } + HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC ); + + if (FAIL == + H5FD_s3comms_hrb_node_set( + &headers, + "x-amz-content-sha256", + (const char *)EMPTY_SHA256) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to set x-amz-content-sha256 header") + } + if (headers == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem building headers list. " + "(placeholder flags)\n"); + } + HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC ); + + if (rangebytesstr != NULL) { + if (FAIL == + H5FD_s3comms_hrb_node_set( + &headers, + "Range", + (const char *)rangebytesstr) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to set range header") + } + if (headers == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem building headers list. " + "(placeholder flags)\n"); + } + HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC ); + } + + if (FAIL == + H5FD_s3comms_hrb_node_set( + &headers, + "Host", + (const char *)handle->purl->host) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to set host header") + } + if (headers == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem building headers list. " + "(placeholder flags)\n"); + } + HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC ); + + request->first_header = headers; + + /**** COMPUTE AUTHORIZATION ****/ + + if (FAIL == /* buffer1 -> canonical request */ + H5FD_s3comms_aws_canonical_request(buffer1, + signed_headers, + request) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "(placeholder flags)\n"); + } + if ( FAIL == /* buffer2->string-to-sign */ + H5FD_s3comms_tostringtosign(buffer2, + buffer1, + iso8601now, + handle->region) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "(placeholder flags)\n"); + } + if (FAIL == /* buffer1 -> signature */ + H5FD_s3comms_HMAC_SHA256(handle->signing_key, + SHA256_DIGEST_LENGTH, + buffer2, + HDstrlen(buffer2), + buffer1) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "(placeholder flags)\n"); + } + + iso8601now[8] = 0; /* trim to yyyyMMDD */ + ret = S3COMMS_FORMAT_CREDENTIAL(buffer2, + handle->secret_id, + iso8601now, + handle->region, + "s3"); + if (ret == 0 || ret >= S3COMMS_MAX_CREDENTIAL_SIZE) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to format aws4 credential string"); + + ret = HDsnprintf(authorization, + 512, + "AWS4-HMAC-SHA256 Credential=%s,SignedHeaders=%s,Signature=%s", + buffer2, + signed_headers, + buffer1); + if (ret == 0 || ret >= 512) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to format aws4 authorization string"); + + /* append authorization header to http request buffer + */ + if (FAIL == + H5FD_s3comms_hrb_node_set( + &headers, + "Authorization", + (const char *)authorization) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to set Authorization header") + } + if (headers == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem building headers list. " + "(placeholder flags)\n"); + } + + /* update hrb's "first header" pointer + */ + request->first_header = headers; + + /**** SET CURLHANDLE HTTP HEADERS FROM GENERATED DATA ****/ + + node = request->first_header; + while (node != NULL) { + HDassert( node->magic == S3COMMS_HRB_NODE_MAGIC ); + curlheaders = curl_slist_append(curlheaders, + (const char *)node->cat); + if (curlheaders == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not append header to curl slist. " + "(placeholder flags)\n"); + } + node = node->next; + } + + /* sanity-check + */ + if (curlheaders == NULL) { + /* above loop was probably never run */ + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "curlheaders was never populated.\n"); + } + + /* finally, set http headers in curl handle + */ + if (CURLE_OK != + curl_easy_setopt(curlh, + CURLOPT_HTTPHEADER, + curlheaders) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "error while setting CURL option " + "(CURLOPT_HTTPHEADER). (placeholder flags)"); + } + + } /* if should authenticate (info provided) */ + + /******************* + * PERFORM REQUEST * + *******************/ + +#if S3COMMS_CURL_VERBOSITY > 0 + /* In event of error, print detailed information to stderr + * This is not the default behavior. + */ + { + long int httpcode = 0; + char curlerrbuf[CURL_ERROR_SIZE]; + curlerrbuf[0] = '\0'; + + if (CURLE_OK != + curl_easy_setopt(curlh, CURLOPT_ERRORBUFFER, curlerrbuf) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem setting error buffer") + } + + p_status = curl_easy_perform(curlh); + + if (p_status != CURLE_OK) { + if (CURLE_OK != + curl_easy_getinfo(curlh, CURLINFO_RESPONSE_CODE, &httpcode) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem getting response code") + } + HDfprintf(stderr, "CURL ERROR CODE: %d\nHTTP CODE: %d\n", + p_status, httpcode); + HDfprintf(stderr, "%s\n", curl_easy_strerror(p_status)); + HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "problem while performing request.\n"); + } + if (CURLE_OK != + curl_easy_setopt(curlh, CURLOPT_ERRORBUFFER, NULL) ) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem unsetting error buffer") + } + } /* verbose error reporting */ +#else + p_status = curl_easy_perform(curlh); + + if (p_status != CURLE_OK) { + HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, + "curl cannot perform request\n") + } +#endif + +#if S3COMMS_DEBUG + if (dest != NULL) { + HDfprintf(stderr, "len: %d\n", (int)len); + HDfprintf(stderr, "CHECKING FOR BUFFER OVERFLOW\n"); + if (sds == NULL) { + HDfprintf(stderr, "sds is NULL!\n"); + } else { + HDfprintf(stderr, "sds: 0x%lx\n", (long long)sds); + HDfprintf(stderr, "sds->size: %d\n", (int)sds->size); + if (len > sds->size) { + HDfprintf(stderr, "buffer overwrite\n"); + } + } + } else { + HDfprintf(stderr, "performed on entire file\n"); + } +#endif + +done: + /* clean any malloc'd resources + */ + if (curlheaders != NULL) { + curl_slist_free_all(curlheaders); + curlheaders = NULL; + } + if (rangebytesstr != NULL) { + H5MM_xfree(rangebytesstr); + rangebytesstr = NULL; + } + if (sds != NULL) { + H5MM_xfree(sds); + sds = NULL; + } + if (request != NULL) { + while (headers != NULL) + if (FAIL == + H5FD_s3comms_hrb_node_set(&headers, headers->name, NULL)) + { + HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "cannot release header node") + } + HDassert( NULL == headers ); + if (FAIL == H5FD_s3comms_hrb_destroy(&request)) { + HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "cannot release header request structure") + } + HDassert( NULL == request ); + } + + if (curlh != NULL) { + /* clear any Range */ + if (CURLE_OK != curl_easy_setopt(curlh, CURLOPT_RANGE, NULL) ) + HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "cannot unset CURLOPT_RANGE") + + /* clear headers */ + if (CURLE_OK != curl_easy_setopt(curlh, CURLOPT_HTTPHEADER, NULL) ) + HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "cannot unset CURLOPT_HTTPHEADER") + } + +#endif /* H5_HAVE_ROS3_VFD */ + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_s3r_read */ + + + +/**************************************************************************** + * MISCELLANEOUS FUNCTIONS + ****************************************************************************/ + + +/*---------------------------------------------------------------------------- + * + * Function: gmnow() + * + * Purpose: + * + * Get the output of `time.h`'s `gmtime()` call while minimizing setup + * clutter where important. + * + * Return: + * + * Pointer to resulting `struct tm`,as created by gmtime(time_t * T). + * + * Programmer: Jacob Smith + * 2017-07-12 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +struct tm * +gmnow(void) +{ + time_t now; + time_t *now_ptr = &now; + struct tm *ret_value = NULL; + + /* Doctor assert, checks against error in time() */ + if ( (time_t)(-1) != time(now_ptr) ) + ret_value = gmtime(now_ptr); + + HDassert( ret_value != NULL ); + + return ret_value; + +} /* gmnow */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_aws_canonical_request() + * + * Purpose: + * + * Compose AWS "Canonical Request" (and signed headers string) + * as defined in the REST API documentation. + * + * Both destination strings are null-terminated. + * + * Destination string arguments must be provided with adequate space. + * + * Canonical Request format: + * + * <HTTP VERB>"\n" + * <resource path>"\n" + * <query string>"\n" + * <header1>"\n" (`lowercase(name)`":"`trim(value)`) + * <header2>"\n" + * ... (headers sorted by name) + * <header_n>"\n" + * "\n" + * <signed headers>"\n" (`lowercase(header 1 name)`";"`header 2 name`;...) + * <hex-string of sha256sum of body> ("e3b0c4429...", e.g.) + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - writes canonical request to respective `...dest` strings + * - FAILURE: `FAIL` + * - one or more input argument was NULL + * - internal error + * + * Programmer: Jacob Smith + * 2017-10-04 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, + char *signed_headers_dest, + hrb_t *http_request) +{ + hrb_node_t *node = NULL; + const char *query_params = ""; /* unused at present */ + herr_t ret_value = SUCCEED; + int ret = 0; /* return value of HDsnprintf */ + size_t len = 0; /* working string length variable */ + char tmpstr[256]; + + /* "query params" refers to the optional element in the URL, e.g. + * http://bucket.aws.com/myfile.txt?max-keys=2&prefix=J + * ^-----------------^ + * + * Not handled/implemented as of 2017-10-xx. + * Element introduced as empty placeholder and reminder. + * Further research to be done if this is ever relevant for the + * VFD use-cases. + */ + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_aws_canonical_request.\n"); +#endif + + if (http_request == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "hrb object cannot be null.\n"); + } + HDassert( http_request->magic == S3COMMS_HRB_MAGIC ); + + if (canonical_request_dest == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "canonical request destination cannot be null.\n"); + } + + if (signed_headers_dest == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "signed headers destination cannot be null.\n"); + } + + /* HTTP verb, resource path, and query string lines + */ + len = (HDstrlen(http_request->verb) + + HDstrlen(http_request->resource) + + HDstrlen(query_params) + + 3 ); + ret = HDsnprintf(canonical_request_dest, + len + 1, + "%s\n%s\n%s\n", + http_request->verb, + http_request->resource, + query_params); + if (ret == 0 || (size_t)ret > len) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to compose canonical request first line"); + + /* write in canonical headers, building signed headers concurrently + */ + node = http_request->first_header; /* assumed at first sorted */ + while (node != NULL) { + size_t join_len = 0; /* string len of joined header-value */ + + HDassert( node->magic == S3COMMS_HRB_NODE_MAGIC ); + + len = HDstrlen(node->lowername); + join_len = HDstrlen(node->value) + len + 2; /* +2 <- ":\n" */ + ret = HDsnprintf(tmpstr, + join_len + 1, /* +1 for null terminator */ + "%s:%s\n", + node->lowername, + node->value); + if (ret == 0 || (size_t)ret > join_len) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to concatenate HTTP header %s:%s", + node->lowername, + node->value); + strcat(canonical_request_dest, tmpstr); + + len += 1; /* semicolon */ + ret = HDsnprintf(tmpstr, + len + 1, + "%s;", + node->lowername); + if (ret == 0 || (size_t)ret > len) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to append signed header %s", + node->lowername); + strcat(signed_headers_dest, tmpstr); + + node = node->next; + } + + /* remove tailing ';' from signed headers sequence + */ + signed_headers_dest[HDstrlen(signed_headers_dest) - 1] = '\0'; + + /* append signed headers and payload hash + * NOTE: at present, no HTTP body is handled, per the nature of + * requests/range-gets + */ + strcat(canonical_request_dest, "\n"); + strcat(canonical_request_dest, signed_headers_dest); + strcat(canonical_request_dest, "\n"); + strcat(canonical_request_dest, EMPTY_SHA256); + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_aws_canonical_request */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_bytes_to_hex() + * + * Purpose: + * + * Produce human-readable hex string [0-9A-F] from sequence of bytes. + * + * For each byte (char), writes two-character hexadecimal representation. + * + * No null-terminator appended. + * + * Assumes `dest` is allocated to enough size (msg_len * 2). + * + * Fails if either `dest` or `msg` are null. + * + * `msg_len` message length of 0 has no effect. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - hex string written to `dest` (not null-terminated) + * - FAILURE: `FAIL` + * - `dest == NULL` + * - `msg == NULL` + * + * Programmer: Jacob Smith + * 2017-07-12 + * + * Changes: + * + * - Integrate into HDF. + * - Rename from hex() to H5FD_s3comms_bytes_to_hex. + * - Change return type from `void` to `herr_t`. + * --- Jacob Smtih 2017-09-14 + * + * - Add bool parameter `lowercase` to configure upper/lowercase output + * of a-f hex characters. + * --- Jacob Smith 2017-09-19 + * + * - Change bool type to `hbool_t` + * --- Jacob Smtih 2017-10-11 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_bytes_to_hex(char *dest, + const unsigned char *msg, + size_t msg_len, + hbool_t lowercase) +{ + size_t i = 0; + herr_t ret_value = SUCCEED; + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_bytes_to_hex.\n"); +#endif + + if (dest == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "hex destination cannot be null.\n") + } + if (msg == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "bytes sequence cannot be null.\n") + } + + for (i = 0; i < msg_len; i++) { + int chars_written = + HDsnprintf(&(dest[i * 2]), + 3, /* 'X', 'X', '\n' */ + (lowercase == TRUE) ? "%02x" + : "%02X", + msg[i]); + if (chars_written != 2) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem while writing hex chars for %c", + msg[i]); + } + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_bytes_to_hex */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_free_purl() + * + * Purpose: + * + * Release resources from a parsed_url_t pointer. + * + * If pointer is null, nothing happens. + * + * Return: + * + * `SUCCEED` (never fails) + * + * Programmer: Jacob Smith + * 2017-11-01 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_free_purl(parsed_url_t *purl) +{ + FUNC_ENTER_NOAPI_NOINIT_NOERR + +#if S3COMMS_DEBUG + HDprintf("called H5FD_s3comms_free_purl.\n"); +#endif + + if (purl != NULL) { + HDassert( purl->magic == S3COMMS_PARSED_URL_MAGIC ); + if (purl->scheme != NULL) H5MM_xfree(purl->scheme); + if (purl->host != NULL) H5MM_xfree(purl->host); + if (purl->port != NULL) H5MM_xfree(purl->port); + if (purl->path != NULL) H5MM_xfree(purl->path); + if (purl->query != NULL) H5MM_xfree(purl->query); + purl->magic += 1ul; + H5MM_xfree(purl); + } + + FUNC_LEAVE_NOAPI(SUCCEED) +} /* H5FD_s3comms_free_purl */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_HMAC_SHA256() + * + * Purpose: + * + * Generate Hash-based Message Authentication Checksum using the SHA-256 + * hashing algorithm. + * + * Given a key, message, and respective lengths (to accommodate null + * characters in either), generate _hex string_ of authentication checksum + * and write to `dest`. + * + * `dest` must be at least `SHA256_DIGEST_LENGTH * 2` characters in size. + * Not enforceable by this function. + * `dest` will _not_ be null-terminated by this function. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - hex string written to `dest` (not null-terminated) + * - FAILURE: `FAIL` + * - `dest == NULL` + * - error while generating hex string output + * + * Programmer: Jacob Smith + * 2017-07-?? + * + * Changes: + * + * - Integrate with HDF5. + * - Rename from `HMAC_SHA256` to `H5FD_s3comms_HMAC_SHA256`. + * - Rename output parameter from `md` to `dest`. + * - Return `herr_t` type instead of `void`. + * - Call `H5FD_s3comms_bytes_to_hex` to generate hex cleartext for output. + * --- Jacob Smith 2017-09-19 + * + * - Use static char array instead of malloc'ing `md` + * --- Jacob Smith 2017-10-10 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_HMAC_SHA256(const unsigned char *key, + size_t key_len, + const char *msg, + size_t msg_len, + char *dest) +{ +#ifdef H5_HAVE_ROS3_VFD + unsigned char md[SHA256_DIGEST_LENGTH]; + unsigned int md_len = SHA256_DIGEST_LENGTH; + herr_t ret_value = SUCCEED; +#else + herr_t ret_value = FAIL; +#endif /* H5_HAVE_ROS3_VFD */ + + + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5_HAVE_ROS3_VFD + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_HMAC_SHA256.\n"); +#endif + + if (dest == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "destination cannot be null."); + } + + HMAC(EVP_sha256(), + key, + (int)key_len, + (const unsigned char *)msg, + msg_len, + md, + &md_len); + + if (FAIL == + H5FD_s3comms_bytes_to_hex(dest, + (const unsigned char *)md, + (size_t)md_len, + true)) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not convert to hex string."); + } + +#endif /* H5_HAVE_ROS3_VFD */ + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_HMAC_SHA256 */ + + +/*----------------------------------------------------------------------------- + * + * Function: H5FD__s3comms_load_aws_creds_from_file() + * + * Purpose: + * + * Extract AWS configuration information from a target file. + * + * Given a file and a profile name, e.g. "ros3_vfd_test", attempt to locate + * that region in the file. If not found, returns in error and output + * pointers are not modified. + * + * If the profile label is found, attempts to locate and parse configuration + * data, stopping at the first line where: + * + reached end of file + * + line does not start with a recognized setting name + * + * Following AWS documentation, looks for any of: + * + aws_access_key_id + * + aws_secret_access_key + * + region + * + * To be valid, the setting must begin the line with one of the keywords, + * followed immediately by an equals sign '=', and have some data before + * newline at end of line. + * + `spam=eggs` would be INVALID because name is unrecognized + * + `region = us-east-2` would be INVALID because of spaces + * + `region=` would be INVALID because no data. + * + * Upon successful parsing of a setting line, will store the result in the + * corresponding output pointer. If the output pointer is NULL, will skip + * any matching setting line while parsing -- useful to prevent overwrite + * when reading from multiple files. + * + * Return: + * + * + SUCCESS: `SUCCEED` + * + no error. settings may or may not have been loaded. + * + FAILURE: `FAIL` + * + internal error occurred. + * + -1 :: unable to format profile label + * + -2 :: profile name/label not found in file + * + -3 :: some other error + * + * Programmer: Jacob Smith + * 2018-02-27 + * + * Changes: None + * + *----------------------------------------------------------------------------- + */ +static herr_t +H5FD__s3comms_load_aws_creds_from_file( + FILE *file, + const char *profile_name, + char *key_id, + char *access_key, + char *aws_region) +{ + char profile_line[32]; + char buffer[128]; + const char *setting_names[] = { + "region", + "aws_access_key_id", + "aws_secret_access_key", + }; + char * const setting_pointers[] = { + aws_region, + key_id, + access_key, + }; + unsigned setting_count = 3; + herr_t ret_value = SUCCEED; + unsigned buffer_i = 0; + unsigned setting_i = 0; + int found_setting = 0; + char *line_buffer = &(buffer[0]); + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called load_aws_creds_from_file.\n"); +#endif + + /* format target line for start of profile */ + if (32 < HDsnprintf(profile_line, 32, "[%s]", profile_name)) + HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL, + "unable to format profile label") + + /* look for start of profile */ + do { + /* clear buffer */ + for (buffer_i=0; buffer_i < 128; buffer_i++) buffer[buffer_i] = 0; + + line_buffer = fgets(line_buffer, 128, file); + if (line_buffer == NULL) /* reached end of file */ + goto done; + } while (strncmp(line_buffer, profile_line, HDstrlen(profile_line))); + + /* extract credentials from lines */ + do { + size_t setting_name_len = 0; + const char *setting_name = NULL; + char line_prefix[128]; + + /* clear buffer */ + for (buffer_i=0; buffer_i < 128; buffer_i++) buffer[buffer_i] = 0; + + /* collect a line from file */ + line_buffer = fgets(line_buffer, 128, file); + if (line_buffer == NULL) + goto done; /* end of file */ + + /* loop over names to see if line looks like assignment */ + for (setting_i = 0; setting_i < setting_count; setting_i++) { + setting_name = setting_names[setting_i]; + setting_name_len = HDstrlen(setting_name); + if (128 < HDsnprintf( + line_prefix, + setting_name_len+2, + "%s=", + setting_name)) + HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL, + "unable to format line prefix") + + /* found a matching name? */ + if (!strncmp(line_buffer, line_prefix, setting_name_len + 1)) { + found_setting = 1; + + /* skip NULL destination buffer */ + if (setting_pointers[setting_i] == NULL) + break; + + /* advance to end fo name in string */ + do { + line_buffer++; + } while (*line_buffer != 0 && *line_buffer != '='); + + if (*line_buffer == 0 || *(line_buffer+1) == 0) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "incomplete assignment in file") + line_buffer++; /* was pointing at '='; advance */ + + /* copy line buffer into out pointer */ + strcpy(setting_pointers[setting_i], (const char *)line_buffer); + + /* "trim" tailing whitespace by replacing with null terminator*/ + buffer_i = 0; + while (!isspace(setting_pointers[setting_i][buffer_i])) + buffer_i++; + setting_pointers[setting_i][buffer_i] = '\0'; + + break; /* have read setting; don't compare with others */ + } + } + } while (found_setting); + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD__s3comms_load_aws_creds_from_file */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_load_aws_profile() + * + * Purpose : + * + * Read aws profile elements from standard location on system and store + * settings in memory. + * + * Looks for both `~/.aws/config` and `~/.aws/credentials`, the standard + * files for AWS tools. If a file exists (can be opened), looks for the + * given profile name and reads the settings into the relevant buffer. + * + * Any setting duplicated in both files will be set to that from + * `credentials`. + * + * Settings are stored in the supplied buffers as null-terminated strings. + * + * Return: + * + * + SUCCESS: `SUCCEED` (0) + * + no error occurred and all settings were populated + * + FAILURE: `FAIL` (-1) + * + internal error occurred + * + unable to locate profile + * + region, key id, and secret key were not all found and set + * + * Programmer: Jacob Smith + * 2018-02-27 + * + * Changes: None + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_load_aws_profile(const char *profile_name, + char *key_id_out, + char *secret_access_key_out, + char *aws_region_out) +{ + herr_t ret_value = SUCCEED; + FILE *credfile = NULL; + char awspath[117]; + char filepath[128]; + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_load_aws_profile.\n"); +#endif + + /* TODO: Windows and other path gotchas */ + if (117 < HDsnprintf(awspath, 117, "%s/.aws/", getenv("HOME"))) + HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL, + "unable to format home-aws path") + if (128 < HDsnprintf(filepath, 128, "%s%s", awspath, "credentials")) + HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL, + "unable to format credentials path") + + credfile = fopen(filepath, "r"); + if (credfile != NULL) { + if (FAIL == H5FD__s3comms_load_aws_creds_from_file( + credfile, + profile_name, + key_id_out, + secret_access_key_out, + aws_region_out)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to load from aws credentials") + if (EOF == fclose(credfile)) + HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "unable to close credentials file") + credfile = NULL; + } + + if (128 < HDsnprintf(filepath, 128, "%s%s", awspath, "config")) + HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL, + "unable to format config path") + credfile = fopen(filepath, "r"); + if (credfile != NULL) { + if (FAIL == H5FD__s3comms_load_aws_creds_from_file( + credfile, + profile_name, + (*key_id_out == 0) ? key_id_out : NULL, + (*secret_access_key_out == 0) ? secret_access_key_out : NULL, + (*aws_region_out == 0) ? aws_region_out : NULL)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to load from aws config") + if (EOF == fclose(credfile)) + HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, + "unable to close config file") + credfile = NULL; + } + + /* fail if not all three settings were loaded */ + if (*key_id_out == 0 || + *secret_access_key_out == 0 || + *aws_region_out == 0) + { + ret_value = FAIL; + } + +done: + if (credfile != NULL) { + if (EOF == fclose(credfile)) + HDONE_ERROR(H5E_ARGS, H5E_ARGS, FAIL, + "problem error-closing aws configuration file") + } + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_load_aws_profile */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_nlowercase() + * + * Purpose: + * + * From string starting at `s`, write `len` characters to `dest`, + * converting all to lowercase. + * + * Behavior is undefined if `s` is NULL or `len` overruns the allocated + * space of either `s` or `dest`. + * + * Provided as convenience. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - upon completion, `dest` is populated + * - FAILURE: `FAIL` + * - `dest == NULL` + * + * Programmer: Jacob Smith + * 2017-09-18 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_nlowercase(char *dest, + const char *s, + size_t len) +{ + herr_t ret_value = SUCCEED; + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_nlowercase.\n"); +#endif + + if (dest == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "destination cannot be null.\n"); + } + + if (len > 0) { + HDmemcpy(dest, s, len); + do { + len--; + dest[len] = (char)tolower( (int)dest[len] ); + } while (len > 0); + } + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_nlowercase */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_parse_url() + * + * Purpose: + * + * Parse URL-like string and stuff URL components into + * `parsed_url` structure, if possible. + * + * Expects null-terminated string of format: + * SCHEME "://" HOST [":" PORT ] ["/" [ PATH ] ] ["?" QUERY] + * where SCHEME :: "[a-zA-Z/.-]+" + * PORT :: "[0-9]" + * + * Stores resulting structure in argument pointer `purl`, if successful, + * creating and populating new `parsed_url_t` structure pointer. + * Empty or absent elements are NULL in new purl structure. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - `purl` pointer is populated + * - FAILURE: `FAIL` + * - unable to parse + * - `purl` is unaltered (probably NULL) + * + * Programmer: Jacob Smith + * 2017-10-30 + * + * Changes: None. + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_parse_url(const char *str, + parsed_url_t **_purl) +{ + parsed_url_t *purl = NULL; /* pointer to new structure */ + const char *tmpstr = NULL; /* working pointer in string */ + const char *curstr = str; /* "start" pointer in string */ + long int len = 0; /* substring length */ + long int urllen = 0; /* length of passed-in url string */ + unsigned int i = 0; + herr_t ret_value = FAIL; + + + + FUNC_ENTER_NOAPI_NOINIT; + +#if S3COMMS_DEBUG + HDprintf("called H5FD_s3comms_parse_url.\n"); +#endif + + if (str == NULL || *str == '\0') { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "invalid url string"); + } + + urllen = (long int)HDstrlen(str); + + purl = (parsed_url_t *)H5MM_malloc(sizeof(parsed_url_t)); + if (purl == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "can't allocate space for parsed_url_t"); + } + purl->magic = S3COMMS_PARSED_URL_MAGIC; + purl->scheme = NULL; + purl->host = NULL; + purl->port = NULL; + purl->path = NULL; + purl->query = NULL; + + /*************** + * READ SCHEME * + ***************/ + + tmpstr = strchr(curstr, ':'); + if (tmpstr == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "invalid SCHEME construction: probably not URL"); + } + len = tmpstr - curstr; + HDassert( (0 <= len) && (len < urllen) ); + + /* check for restrictions + */ + for (i = 0; i < len; i++) { + /* scheme = [a-zA-Z+-.]+ (terminated by ":") */ + if (!isalpha(curstr[i]) && + '+' != curstr[i] && + '-' != curstr[i] && + '.' != curstr[i]) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "invalid SCHEME construction"); + } + } + /* copy lowercased scheme to structure + */ + purl->scheme = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); + if (purl->scheme == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "can't allocate space for SCHEME"); + } + (void)HDstrncpy(purl->scheme, curstr, (size_t)len); + purl->scheme[len] = '\0'; + for ( i = 0; i < len; i++ ) { + purl->scheme[i] = (char)tolower(purl->scheme[i]); + } + + /* Skip "://" */ + tmpstr += 3; + curstr = tmpstr; + + /************* + * READ HOST * + *************/ + + if (*curstr == '[') { + /* IPv6 */ + while (']' != *tmpstr) { + if (tmpstr == 0) { /* end of string reached! */ + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "reached end of URL: incomplete IPv6 HOST"); + } + tmpstr++; + } + tmpstr++; + } else { + while (0 != *tmpstr) { + if (':' == *tmpstr || + '/' == *tmpstr || + '?' == *tmpstr) + { + break; + } + tmpstr++; + } + } /* if IPv4 or IPv6 */ + len = tmpstr - curstr; + if (len == 0) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "HOST substring cannot be empty"); + } else if (len > urllen) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem with length of HOST substring"); + } + + /* copy host + */ + purl->host = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); + if (purl->host == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "can't allocate space for HOST"); + } + (void)HDstrncpy(purl->host, curstr, (size_t)len); + purl->host[len] = 0; + + /************* + * READ PORT * + *************/ + + if (':' == *tmpstr) { + tmpstr += 1; /* advance past ':' */ + curstr = tmpstr; + while ((0 != *tmpstr) && ('/' != *tmpstr) && ('?' != *tmpstr)) { + tmpstr++; + } + len = tmpstr - curstr; + if (len == 0) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "PORT element cannot be empty"); + } else if (len > urllen) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem with length of PORT substring"); + } + for (i = 0; i < len; i ++) { + if (!isdigit(curstr[i])) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "PORT is not a decimal string"); + } + } + + /* copy port + */ + purl->port = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); + if (purl->port == NULL) { /* cannot malloc */ + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "can't allocate space for PORT"); + } + (void)HDstrncpy(purl->port, curstr, (size_t)len); + purl->port[len] = 0; + } /* if PORT element */ + + /************* + * READ PATH * + *************/ + + if ('/' == *tmpstr) { + /* advance past '/' */ + tmpstr += 1; + curstr = tmpstr; + + /* seek end of PATH + */ + while ((0 != *tmpstr) && ('?' != *tmpstr)) { + tmpstr++; + } + len = tmpstr - curstr; + if (len > urllen) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem with length of PATH substring"); + } + if (len > 0) { + purl->path = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); + if (purl->path == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "can't allocate space for PATH"); + } /* cannot malloc path pointer */ + (void)HDstrncpy(purl->path, curstr, (size_t)len); + purl->path[len] = 0; + } + } /* if PATH element */ + + /************** + * READ QUERY * + **************/ + + if ('?' == *tmpstr) { + tmpstr += 1; + curstr = tmpstr; + while (0 != *tmpstr) { + tmpstr++; + } + len = tmpstr - curstr; + if (len == 0) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "QUERY cannot be empty"); + } else if (len > urllen) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem with length of QUERY substring"); + } + purl->query = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1)); + if (purl->query == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL, + "can't allocate space for QUERY"); + } /* cannot malloc path pointer */ + (void)HDstrncpy(purl->query, curstr, (size_t)len); + purl->query[len] = 0; + } /* if QUERY exists */ + + + + *_purl = purl; + ret_value = SUCCEED; + +done: + if (ret_value == FAIL) { + H5FD_s3comms_free_purl(purl); + } + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_parse_url */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_percent_encode_char() + * + * Purpose: + * + * "Percent-encode" utf-8 character `c`, e.g., + * '$' -> "%24" + * '¢' -> "%C2%A2" + * + * `c` cannot be null. + * + * Does not (currently) accept multi-byte characters... + * limit to (?) u+00ff, well below upper bound for two-byte utf-8 encoding + * (u+0080..u+07ff). + * + * Writes output to `repr`. + * `repr` cannot be null. + * Assumes adequate space i `repr`... + * >>> char[4] or [7] for most characters, + * >>> [13] as theoretical maximum. + * + * Representation `repr` is null-terminated. + * + * Stores length of representation (without null terminator) at pointer + * `repr_len`. + * + * Return : SUCCEED/FAIL + * + * - SUCCESS: `SUCCEED` + * - percent-encoded representation written to `repr` + * - 'repr' is null-terminated + * - FAILURE: `FAIL` + * - `c` or `repr` was NULL + * + * Programmer: Jacob Smith + * + * Changes: + * + * - Integrate into HDF. + * - Rename from `hexutf8` to `H5FD_s3comms_percent_encode_char`. + * --- Jacob Smith 2017-09-15 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_percent_encode_char(char *repr, + const unsigned char c, + size_t *repr_len) +{ + unsigned int acc = 0; + unsigned int i = 0; + unsigned int k = 0; + unsigned int stack[4] = {0, 0, 0, 0}; + unsigned int stack_size = 0; + int chars_written = 0; + herr_t ret_value = SUCCEED; +#if S3COMMS_DEBUG + unsigned char s[2] = {c, 0}; + unsigned char hex[3] = {0, 0, 0}; +#endif + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_percent_encode_char.\n"); +#endif + + if (repr == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no destination `repr`.\n") + } + +#if S3COMMS_DEBUG + H5FD_s3comms_bytes_to_hex((char *)hex, s, 1, FALSE); + HDfprintf(stdout, " CHAR: \'%s\'\n", s); + HDfprintf(stdout, " CHAR-HEX: \"%s\"\n", hex); +#endif + + if (c <= (unsigned char)0x7f) { + /* character represented in a single "byte" + * and single percent-code + */ +#if S3COMMS_DEBUG + HDfprintf(stdout, " SINGLE-BYTE\n"); +#endif + *repr_len = 3; + chars_written = HDsnprintf(repr, 4, "%%%02X", c); + if (chars_written != 3) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "cannot write char %c", + c); + } else { + /* multi-byte, multi-percent representation + */ +#if S3COMMS_DEBUG + HDfprintf(stdout, " MULTI-BYTE\n"); +#endif + stack_size = 0; + k = (unsigned int)c; + *repr_len = 0; + do { + /* push number onto stack in six-bit slices + */ + acc = k; + acc >>= 6; /* cull least */ + acc <<= 6; /* six bits */ + stack[stack_size++] = k - acc; /* max six-bit number */ + k = acc >> 6; + } while (k > 0); + + /* now have "stack" of two to four six-bit numbers + * to be put into UTF-8 byte fields + */ + +#if S3COMMS_DEBUG + HDfprintf(stdout, " STACK:\n {\n"); + for (i = 0; i < stack_size; i++) { + H5FD_s3comms_bytes_to_hex((char *)hex, + (unsigned char *)(&stack[i]), + 1, + FALSE); + hex[2] = 0; + HDfprintf(stdout, " %s,\n", hex); + } + HDfprintf(stdout, " }\n"); +#endif + + /**************** + * leading byte * + ****************/ + + /* prepend 11[1[1]]0 to first byte */ + /* 110xxxxx, 1110xxxx, or 11110xxx */ + acc = 0xC0; /* 2^7 + 2^6 -> 11000000 */ + acc += (stack_size > 2) ? 0x20 : 0; + acc += (stack_size > 3) ? 0x10 : 0; + stack_size -= 1; + chars_written = HDsnprintf(repr, 4, "%%%02X", acc + stack[stack_size]); + if (chars_written != 3) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "cannot write char %c", + c); + *repr_len += 3; + + /************************ + * continuation byte(s) * + ************************/ + + /* 10xxxxxx */ + for (i = 0; i < stack_size; i++) { + chars_written = HDsnprintf(&repr[i*3 + 3], + 4, + "%%%02X", + 128 + stack[stack_size - 1 - i]); + if (chars_written != 3) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "cannot write char %c", + c); + *repr_len += 3; + } + } + *(repr + *repr_len) = '\0'; + +done: + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_percent_encode_char */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_signing_key() + * + * Purpose: + * + * Create AWS4 "Signing Key" from secret key, AWS region, and timestamp. + * + * Sequentially runs HMAC_SHA256 on strings in specified order, + * generating re-usable checksum (according to documentation, valid for + * 7 days from time given). + * + * `secret` is `access key id` for targeted service/bucket/resource. + * + * `iso8601now` must conform to format, yyyyMMDD'T'hhmmss'Z' + * e.g. "19690720T201740Z". + * + * `region` should be one of AWS service region names, e.g. "us-east-1". + * + * Hard-coded "service" algorithm requirement to "s3". + * + * Inputs must be null-terminated strings. + * + * Writes to `md` the raw byte data, length of `SHA256_DIGEST_LENGTH`. + * Programmer must ensure that `md` is appropriately allocated. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - raw byte data of signing key written to `md` + * - FAILURE: `FAIL` + * - if any input arguments was NULL + * + * Programmer: Jacob Smith + * 2017-07-13 + * + * Changes: + * + * - Integrate into HDF5. + * - Return herr_t type. + * --- Jacob Smith 2017-09-18 + * + * - NULL check and fail of input parameters. + * --- Jacob Smith 2017-10-10 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_signing_key(unsigned char *md, + const char *secret, + const char *region, + const char *iso8601now) +{ +#ifdef H5_HAVE_ROS3_VFD + char *AWS4_secret = NULL; + size_t AWS4_secret_len = 0; + unsigned char datekey[SHA256_DIGEST_LENGTH]; + unsigned char dateregionkey[SHA256_DIGEST_LENGTH]; + unsigned char dateregionservicekey[SHA256_DIGEST_LENGTH]; + int ret = 0; /* return value of HDsnprintf */ + herr_t ret_value = SUCCEED; +#else + herr_t ret_value = SUCCEED; +#endif /* H5_HAVE_ROS3_VFD */ + + + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5_HAVE_ROS3_VFD + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_signing_key.\n"); +#endif + + if (md == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Destination `md` cannot be NULL.\n") + } + if (secret == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "`secret` cannot be NULL.\n") + } + if (region == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "`region` cannot be NULL.\n") + } + if (iso8601now == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "`iso8601now` cannot be NULL.\n") + } + + AWS4_secret_len = 4 + HDstrlen(secret) + 1; + AWS4_secret = (char*)H5MM_malloc(sizeof(char *) * AWS4_secret_len); + if (AWS4_secret == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Could not allocate space.\n") + } + + /* prepend "AWS4" to start of the secret key + */ + ret = HDsnprintf(AWS4_secret, AWS4_secret_len,"%s%s", "AWS4", secret); + if ((size_t)ret != (AWS4_secret_len - 1)) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem writing AWS4+secret `%s`", + secret); + + /* hash_func, key, len(key), msg, len(msg), digest_dest, digest_len_dest + * we know digest length, so ignore via NULL + */ + HMAC(EVP_sha256(), + (const unsigned char *)AWS4_secret, + (int)HDstrlen(AWS4_secret), + (const unsigned char*)iso8601now, + 8, /* 8 --> length of 8 --> "yyyyMMDD" */ + datekey, + NULL); + HMAC(EVP_sha256(), + (const unsigned char *)datekey, + SHA256_DIGEST_LENGTH, + (const unsigned char *)region, + HDstrlen(region), + dateregionkey, + NULL); + HMAC(EVP_sha256(), + (const unsigned char *)dateregionkey, + SHA256_DIGEST_LENGTH, + (const unsigned char *)"s3", + 2, + dateregionservicekey, + NULL); + HMAC(EVP_sha256(), + (const unsigned char *)dateregionservicekey, + SHA256_DIGEST_LENGTH, + (const unsigned char *)"aws4_request", + 12, + md, + NULL); + +done: + H5MM_xfree(AWS4_secret); + +#endif /* H5_HAVE_ROS3_VFD */ + + FUNC_LEAVE_NOAPI(ret_value); + +} /* H5FD_s3comms_signing_key */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_tostringtosign() + * + * Purpose: + * + * Get AWS "String to Sign" from Canonical Request, timestamp, + * and AWS "region". + * + * Common between single request and "chunked upload", + * conforms to: + * "AWS4-HMAC-SHA256\n" + + * <ISO8601 date format> + "\n" + // yyyyMMDD'T'hhmmss'Z' + * <yyyyMMDD> + "/" + <AWS Region> + "/s3/aws4-request\n" + + * hex(SHA256(<CANONICAL-REQUEST>)) + * + * Inputs `creq` (canonical request string), `now` (ISO8601 format), + * and `region` (s3 region designator string) must all be + * null-terminated strings. + * + * Result is written to `dest` with null-terminator. + * It is left to programmer to ensure `dest` has adequate space. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - "string to sign" written to `dest` and null-terminated + * - FAILURE: `FAIL` + * - if any of the inputs are NULL + * - if an error is encountered while computing checksum + * + * Programmer: Jacob Smith + * 2017-07-?? + * + * Changes: + * + * - Integrate with HDF5. + * - Rename from `tostringtosign` to `H5FD_s3comms_tostringtosign`. + * - Return `herr_t` instead of characters written. + * - Use HDF-friendly bytes-to-hex function (`H5FD_s3comms_bytes_to_hex`) + * instead of general-purpose, deprecated `hex()`. + * - Adjust casts to openssl's `SHA256`. + * - Input strings are now `const`. + * --- Jacob Smith 2017-09-19 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_tostringtosign(char *dest, + const char *req, + const char *now, + const char *region) +{ +#ifdef H5_HAVE_ROS3_VFD + unsigned char checksum[SHA256_DIGEST_LENGTH * 2 + 1]; + size_t d = 0; + char day[9]; + char hexsum[SHA256_DIGEST_LENGTH * 2 + 1]; + size_t i = 0; + int ret = 0; /* HDsnprintf return value */ + herr_t ret_value = SUCCEED; + char tmp[128]; +#else + herr_t ret_value = FAIL; +#endif /* H5_HAVE_ROS3_VFD */ + + + FUNC_ENTER_NOAPI_NOINIT + +#ifdef H5_HAVE_ROS3_VFD + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_tostringtosign.\n"); +#endif + + if (dest == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "destination buffer cannot be null.\n") + } + if (req == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "canonical request cannot be null.\n") + } + if (now == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Timestring cannot be NULL.\n") + } + if (region == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "Region cannot be NULL.\n") + } + + + + for (i = 0; i < 128; i++) { + tmp[i] = '\0'; + } + for (i = 0; i < SHA256_DIGEST_LENGTH * 2 + 1; i++) { + checksum[i] = '\0'; + hexsum[i] = '\0'; + } + HDstrncpy(day, now, 8); + day[8] = '\0'; + ret = HDsnprintf(tmp, 127, "%s/%s/s3/aws4_request", day, region); + if (ret <= 0 || ret >= 127) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "problem adding day and region to string") + + + + HDmemcpy((dest + d), "AWS4-HMAC-SHA256\n", 17); + d = 17; + + HDmemcpy((dest+d), now, HDstrlen(now)); + d += HDstrlen(now); + dest[d++] = '\n'; + + HDmemcpy((dest + d), tmp, HDstrlen(tmp)); + d += HDstrlen(tmp); + dest[d++] = '\n'; + + SHA256((const unsigned char *)req, + HDstrlen(req), + checksum); + + if (FAIL == + H5FD_s3comms_bytes_to_hex(hexsum, + (const unsigned char *)checksum, + SHA256_DIGEST_LENGTH, + true)) + { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "could not create hex string"); + } + + for (i = 0; i < SHA256_DIGEST_LENGTH * 2; i++) { + dest[d++] = hexsum[i]; + } + + dest[d] = '\0'; + +#endif /* H5_HAVE_ROS3_VFD */ + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5ros3_tostringtosign */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_trim() + * + * Purpose: + * + * Remove all whitespace characters from start and end of a string `s` + * of length `s_len`, writing trimmed string copy to `dest`. + * Stores number of characters remaining at `n_written`. + * + * Destination for trimmed copy `dest` cannot be null. + * `dest` must have adequate space allocated for trimmed copy. + * If inadequate space, behavior is undefined, possibly resulting + * in segfault or overwrite of other data. + * + * If `s` is NULL or all whitespace, `dest` is untouched and `n_written` + * is set to 0. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * - `dest == NULL` + * + * Programmer: Jacob Smith + * 2017-09-18 + * + * Changes: + * + * - Rename from `trim()` to `H5FD_s3comms_trim()`. + * - Incorporate into HDF5. + * - Returns `herr_t` type. + * --- Jacob Smith 2017-??-?? + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_trim(char *dest, + char *s, + size_t s_len, + size_t *n_written) +{ + herr_t ret_value = SUCCEED; + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "called H5FD_s3comms_trim.\n"); +#endif + + if (dest == NULL) { + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "destination cannot be null.") + } + if (s == NULL) { + s_len = 0; + } + + + + if (s_len > 0) { + /* Find first non-whitespace character from start; + * reduce total length per character. + */ + while ((s_len > 0) && + isspace((unsigned char)s[0]) && s_len > 0) + { + s++; + s_len--; + } + + /* Find first non-whitespace character from tail; + * reduce length per-character. + * If length is 0 already, there is no non-whitespace character. + */ + if (s_len > 0) { + do { + s_len--; + } while( isspace((unsigned char)s[s_len]) ); + s_len++; + + /* write output into dest + */ + HDmemcpy(dest, s, s_len); + } + } + + *n_written = s_len; + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_s3comms_trim */ + + +/*---------------------------------------------------------------------------- + * + * Function: H5FD_s3comms_uriencode() + * + * Purpose: + * + * URIencode (percent-encode) every byte except "[a-zA-Z0-9]-._~". + * + * For each character in source string `_s` from `s[0]` to `s[s_len-1]`, + * writes to `dest` either the raw character or its percent-encoded + * equivalent. + * + * See `H5FD_s3comms_bytes_to_hex` for information on percent-encoding. + * + * Space (' ') character encoded as "%20" (not "+") + * + * Forward-slash ('/') encoded as "%2F" only when `encode_slash == true`. + * + * Records number of characters written at `n_written`. + * + * Assumes that `dest` has been allocated with enough space. + * + * Neither `dest` nor `s` can be NULL. + * + * `s_len == 0` will have no effect. + * + * Return: + * + * - SUCCESS: `SUCCEED` + * - FAILURE: `FAIL` + * - source strings `s` or destination `dest` are NULL + * - error while attempting to percent-encode a character + * + * Programmer: Jacob Smith + * 2017-07-?? + * + * Changes: + * + * - Integrate to HDF environment. + * - Rename from `uriencode` to `H5FD_s3comms_uriencode`. + * - Change return from characters written to herr_t; + * move to i/o parameter `n_written`. + * - No longer append null-terminator to string; + * programmer may append or not as appropriate upon return. + * --- Jacob Smith 2017-09-15 + * + *---------------------------------------------------------------------------- + */ +herr_t +H5FD_s3comms_uriencode(char *dest, + const char *s, + size_t s_len, + hbool_t encode_slash, + size_t *n_written) +{ + char c = 0; + size_t dest_off = 0; + char hex_buffer[13]; + size_t hex_off = 0; + size_t hex_len = 0; + herr_t ret_value = SUCCEED; + size_t s_off = 0; + + + + FUNC_ENTER_NOAPI_NOINIT + +#if S3COMMS_DEBUG + HDfprintf(stdout, "H5FD_s3comms_uriencode called.\n"); +#endif + + if (s == NULL) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "source string cannot be NULL"); + if (dest == NULL) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "destination cannot be NULL"); + + /* Write characters to destination, converting to percent-encoded + * "hex-utf-8" strings if necessary. + * e.g., '$' -> "%24" + */ + for (s_off = 0; s_off < s_len; s_off++) { + c = s[s_off]; + if (isalnum(c) || + c == '.' || + c == '-' || + c == '_' || + c == '~' || + (c == '/' && encode_slash == FALSE)) + { + dest[dest_off++] = c; + } else { + hex_off = 0; + if (FAIL == + H5FD_s3comms_percent_encode_char(hex_buffer, + (const unsigned char)c, + &hex_len)) + { + hex_buffer[0] = c; + hex_buffer[1] = 0; + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "unable to percent-encode character \'%s\' " + "at %d in \"%s\"", hex_buffer, (int)s_off, s); + } + + for (hex_off = 0; hex_off < hex_len; hex_off++) { + dest[dest_off++] = hex_buffer[hex_off]; + } + } + } + + if (dest_off < s_len) + HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, + "buffer overflow"); + + *n_written = dest_off; + +done: + FUNC_LEAVE_NOAPI(ret_value) + +} /* H5FD_s3comms_uriencode */ + + diff --git a/src/H5FDs3comms.h b/src/H5FDs3comms.h new file mode 100644 index 0000000..0524c46 --- /dev/null +++ b/src/H5FDs3comms.h @@ -0,0 +1,634 @@ +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * + * Read-Only S3 Virtual File Driver (VFD) * + * Copyright (c) 2017-2018, The HDF Group. * + * * + * All rights reserved. * + * * + * NOTICE: * + * All information contained herein is, and remains, the property of The HDF * + * Group. The intellectual and technical concepts contained herein are * + * proprietary to The HDF Group. Dissemination of this information or * + * reproduction of this material is strictly forbidden unless prior written * + * permission is obtained from The HDF Group. * + * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/***************************************************************************** + * + * This is the header for the S3 Communications module + * + * ***NOT A FILE DRIVER*** + * + * Purpose: + * + * - Provide structures and functions related to communicating with + * Amazon S3 (Simple Storage Service). + * - Abstract away the REST API (HTTP, + * networked communications) behind a series of uniform function calls. + * - Handle AWS4 authentication, if appropriate. + * - Fail predictably in event of errors. + * - Eventually, support more S3 operations, such as creating, writing to, + * and removing Objects remotely. + * + * translates: + * `read(some_file, bytes_offset, bytes_length, &dest_buffer);` + * to: + * ``` + * GET myfile HTTP/1.1 + * Host: somewhere.me + * Range: bytes=4096-5115 + * ``` + * and places received bytes from HTTP response... + * ``` + * HTTP/1.1 206 Partial-Content + * Content-Range: 4096-5115/63239 + * + * <bytes> + * ``` + * ...in destination buffer. + * + * TODO: put documentation in a consistent place and point to it from here. + * + * Programmer: Jacob Smith + * 2017-11-30 + * + *****************************************************************************/ + +#include <ctype.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#ifdef H5_HAVE_ROS3_VFD +#include <curl/curl.h> +#include <openssl/evp.h> +#include <openssl/hmac.h> +#include <openssl/sha.h> +#endif /* ifdef H5_HAVE_ROS3_VFD */ + +/***************** + * PUBLIC MACROS * + *****************/ + +/* hexadecimal string of pre-computed sha256 checksum of the empty string + * hex(sha256sum("")) + */ +#define EMPTY_SHA256 \ +"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + +/* string length (plus null terminator) + * example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_') + */ +#define ISO8601_SIZE 17 + +/* string length (plus null terminator) + * example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT" + */ +#define RFC7231_SIZE 30 + +/*--------------------------------------------------------------------------- + * + * Macro: ISO8601NOW() + * + * Purpose: + * + * write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest + * e.g., "20170630T204155Z" + * + * wrapper for strftime() + * + * It is left to the programmer to check return value of + * ISO8601NOW (should equal ISO8601_SIZE - 1). + * + * Programmer: Jacob Smith + * 2017-07-?? + * + *--------------------------------------------------------------------------- + */ +#define ISO8601NOW(dest, now_gm) \ +strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm)) + +/*--------------------------------------------------------------------------- + * + * Macro: RFC7231NOW() + * + * Purpose: + * + * write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest + * e.g., "Fri, 30 Jun 2017 20:41:55 GMT" + * + * wrapper for strftime() + * + * It is left to the programmer to check return value of + * RFC7231NOW (should equal RFC7231_SIZE - 1). + * + * Programmer: Jacob Smith + * 2017-07-?? + * + *--------------------------------------------------------------------------- + */ +#define RFC7231NOW(dest, now_gm) \ +strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm)) + + +/* Reasonable maximum length of a credential string. + * Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below). + * 17 <- "////aws4_request\0" + * 2 < "s3" (service) + * 8 <- "YYYYmmdd" (date) + * 128 <- (access_id) + * 155 :: sum + */ +#define S3COMMS_MAX_CREDENTIAL_SIZE 155 + + +/*--------------------------------------------------------------------------- + * + * Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL() + * + * Purpose: + * + * Format "S3 Credential" string from inputs, for AWS4. + * + * Wrapper for HDsnprintf(). + * + * _HAS NO ERROR-CHECKING FACILITIES_ + * It is left to programmer to ensure that return value confers success. + * e.g., + * ``` + * assert( S3COMMS_MAX_CREDENTIAL_SIZE >= + * S3COMMS_FORMAT_CREDENTIAL(...) ); + * ``` + * + * "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request" + * assuming that `dest` has adequate space. + * + * ALL inputs must be null-terminated strings. + * + * `access` should be the user's access key ID. + * `date` must be of format "YYYYmmdd". + * `region` should be relevant AWS region, i.e. "us-east-1". + * `service` should be "s3". + * + * Programmer: Jacob Smith + * 2017-09-19 + * + * Changes: None. + * + *--------------------------------------------------------------------------- + */ +#define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service) \ +HDsnprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, \ + "%s/%s/%s/%s/aws4_request", \ + (access), (iso8601_date), (region), (service)) + +/********************* + * PUBLIC STRUCTURES * + *********************/ + + +/*---------------------------------------------------------------------------- + * + * Structure: hrb_node_t + * + * HTTP Header Field Node + * + * + * + * Maintain a ordered (linked) list of HTTP Header fields. + * + * Provides efficient access and manipulation of a logical sequence of + * HTTP header fields, of particular use when composing an + * "S3 Canonical Request" for authentication. + * + * - The creation of a Canoncial Request involves: + * - convert field names to lower case + * - sort by this lower-case name + * - convert ": " name-value separator in HTTP string to ":" + * - get sorted lowercase names without field or separator + * + * As HTTP headers allow headers in any order (excepting the case of multiple + * headers with the same name), the list ordering can be optimized for Canonical + * Request creation, suggesting alphabtical order. For more expedient insertion + * and removal of elements in the list, linked list seems preferable to a + * dynamically-expanding array. The usually-smaller number of entries (5 or + * fewer) makes performance overhead of traversing the list trivial. + * + * The above requirements of creating at Canonical Request suggests a reasonable + * trade-off of speed for space with the option to compute elements as needed + * or to have the various elements prepared and stored in the structure + * (e.g. name, value, lowername, concatenated name:value) + * The structure currently is implemented to pre-compute. + * + * At all times, the "first" node of the list should be the least, + * alphabetically. For all nodes, the `next` node should be either NULL or + * of greater alphabetical value. + * + * Each node contains its own header field information, plus a pointer to the + * next node. + * + * It is not allowed to have multiple nodes with the same _lowercase_ `name`s + * in the same list + * (i.e., name is case-insensitive for access and modification.) + * + * All data (`name`, `value`, `lowername`, and `cat`) are null-terminated + * strings allocated specifically for their node. + * + * + * + * `magic` (unsigned long) + * + * "unique" idenfier number for the structure type + * + * `name` (char *) + * + * Case-meaningful name of the HTTP field. + * Given case is how it is supplied to networking code. + * e.g., "Range" + * + * `lowername` (char *) + * + * Lowercase copy of name. + * e.g., "range" + * + * `value` (char *) + * + * Case-meaningful value of HTTP field. + * e.g., "bytes=0-9" + * + * `cat` (char *) + * + * Concatenated, null-terminated string of HTTP header line, + * as the field would appear in an HTTP request. + * e.g., "Range: bytes=0-9" + * + * `next` (hrb_node_t *) + * + * Pointers to next node in the list, or NULL sentinel as end of list. + * Next node must have a greater `lowername` as determined by strcmp(). + * + * + * + * Programmer: Jacob Smith + * 2017-09-22 + * + * Changes: + * + * - Change from twin doubly-linked lists to singly-linked list. + * --- Jake Smith 2017-01-17 + * + *---------------------------------------------------------------------------- + */ +typedef struct hrb_node_t { + unsigned long magic; + char *name; + char *value; + char *cat; + char *lowername; + struct hrb_node_t *next; +} hrb_node_t; +#define S3COMMS_HRB_NODE_MAGIC 0x7F5757UL + + +/*---------------------------------------------------------------------------- + * + * Structure: hrb_t + * + * HTTP Request Buffer structure + * + * + * + * Logically represent an HTTP request + * + * GET /myplace/myfile.h5 HTTP/1.1 + * Host: over.rainbow.oz + * Date: Fri, 01 Dec 2017 12:35:04 CST + * + * <body> + * + * ...with fast, efficient access to and modification of primary and field + * elements. + * + * Structure for building HTTP requests while hiding much of the string + * processing required "under the hood." + * + * Information about the request target -- the first line -- and the body text, + * if any, are managed directly with this structure. All header fields, e.g., + * "Host" and "Date" above, are created with a linked list of `hrb_node_t` and + * included in the request by a pointer to the head of the list. + * + * + * + * `magic` (unsigned long) + * + * "Magic" number confirming that this is an hrb_t structure and + * what operations are valid for it. + * + * Must be S3COMMS_HRB_MAGIC to be valid. + * + * `body` (char *) : + * + * Pointer to start of HTTP body. + * + * Can be NULL, in which case it is treated as the empty string, "". + * + * `body_len` (size_t) : + * + * Number of bytes (characters) in `body`. 0 if empty or NULL `body`. + * + * `first_header` (hrb_node_t *) : + * + * Pointer to first SORTED header node, if any. + * It is left to the programmer to ensure that this node and associated + * list is destroyed when done. + * + * `resource` (char *) : + * + * Pointer to resource URL string, e.g., "/folder/page.xhtml". + * + * `verb` (char *) : + * + * Pointer to HTTP verb string, e.g., "GET". + * + * `version` (char *) : + * + * Pointer to HTTP version string, e.g., "HTTP/1.1". + * + * + * + * Programmer: Jacob Smith + * + *---------------------------------------------------------------------------- + */ +typedef struct { + unsigned long magic; + char *body; + size_t body_len; + hrb_node_t *first_header; + char *resource; + char *verb; + char *version; +} hrb_t; +#define S3COMMS_HRB_MAGIC 0x6DCC84UL + + +/*---------------------------------------------------------------------------- + * + * Structure: parsed_url_t + * + * + * Represent a URL with easily-accessed pointers to logical elements within. + * These elements (components) are stored as null-terminated strings (or just + * NULLs). These components should be allocated for the structure, making the + * data as safe as possible from modification. If a component is NULL, it is + * either implicit in or absent from the URL. + * + * "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value" + * ^--^ ^-----------------------^ ^--^ ^---------^ ^-------------------^ + * Scheme Host Port Resource Query/-ies + * + * + * + * `magic` (unsigned long) + * + * Structure identification and validation identifier. + * Identifies as `parsed_url_t` type. + * + * `scheme` (char *) + * + * String representing which protocol is to be expected. + * _Must_ be present. + * "http", "https", "ftp", e.g. + * + * `host` (char *) + * + * String of host, either domain name, IPv4, or IPv6 format. + * _Must_ be present. + * "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]" + * + * `port` (char *) + * + * String representation of specified port. Must resolve to a valid unsigned + * integer. + * "9000", "80" + * + * `path` (char *) + * + * Path to resource on host. If not specified, assumes root "/". + * "lollipop_guild.wav", "characters/witches/white.dat" + * + * `query` (char *) + * + * Single string of all query parameters in url (if any). + * "arg1=value1&arg2=value2" + * + * + * + * Programmer: Jacob Smith + * + *---------------------------------------------------------------------------- + */ +typedef struct { + unsigned long magic; + char *scheme; /* required */ + char *host; /* required */ + char *port; + char *path; + char *query; +} parsed_url_t; +#define S3COMMS_PARSED_URL_MAGIC 0x21D0DFUL + + +/*---------------------------------------------------------------------------- + * + * Structure: s3r_t + * + * + * + * S3 request structure "handle". + * + * Holds persistent information for Amazon S3 requests. + * + * Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self. + * + * Intended to be re-used for operations on a remote object. + * + * Cleaned up through `H5FD_s3comms_s3r_close()`. + * + * _DO NOT_ share handle between threads: curl easy handle `curlhandle` has + * undefined behavior if called to perform in multiple threads. + * + * + * + * `magic` (unsigned long) + * + * "magic" number identifying this structure as unique type. + * MUST equal `S3R_MAGIC` to be valid. + * + * `curlhandle` (CURL) + * + * Pointer to the curl_easy handle generated for the request. + * + * `httpverb` (char *) + * + * Pointer to NULL-terminated string. HTTP verb, + * e.g. "GET", "HEAD", "PUT", etc. + * + * Default is NULL, resulting in a "GET" request. + * + * `purl` (parsed_url_t *) + * + * Pointer to structure holding the elements of URL for file open. + * + * e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2" + * parsed into... + * { scheme: "http" + * host: "bucket.aws.com" + * port: "8080" + * path: "myfile.dat" + * query: "q1=v1&q2=v2" + * } + * + * Cannot be NULL. + * + * `region` (char *) + * + * Pointer to NULL-terminated string, specifying S3 "region", + * e.g., "us-east-1". + * + * Required to authenticate. + * + * `secret_id` (char *) + * + * Pointer to NULL-terminated string for "secret" access id to S3 resource. + * + * Requred to authenticate. + * + * `signing_key` (unsigned char *) + * + * Pointer to `SHA256_DIGEST_LENGTH`-long string for "re-usable" signing + * key, generated via + * `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>", + * "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")` + * which may be re-used for several (up to seven (7)) days from creation? + * Computed once upon file open. + * + * Requred to authenticate. + * + * + * + * Programmer: Jacob Smith + * + *---------------------------------------------------------------------------- + */ +typedef struct { + unsigned long magic; +#ifdef H5_HAVE_ROS3_VFD + CURL *curlhandle; + size_t filesize; + char *httpverb; + parsed_url_t *purl; + char *region; + char *secret_id; + unsigned char *signing_key; +#endif /* ifdef H5_HAVE_ROS3_VFD */ +} s3r_t; +#define S3COMMS_S3R_MAGIC 0x44d8d79 + +/******************************************* + * DECLARATION OF HTTP FIELD LIST ROUTINES * + *******************************************/ + +herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L, + const char *name, + const char *value); + +/*********************************************** + * DECLARATION OF HTTP REQUEST BUFFER ROUTINES * + ***********************************************/ + +herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf); + +hrb_t * H5FD_s3comms_hrb_init_request(const char *verb, + const char *resource, + const char *host); + +/************************************* + * DECLARATION OF S3REQUEST ROUTINES * + *************************************/ + +H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle); + +H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle); + +H5_DLL s3r_t * H5FD_s3comms_s3r_open(const char url[], + const char region[], + const char id[], + const unsigned char signing_key[]); + +H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle, + haddr_t offset, + size_t len, + void *dest); + +/********************************* + * DECLARATION OF OTHER ROUTINES * + *********************************/ + +H5_DLL struct tm * gmnow(void); + +herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest, + char *signed_headers_dest, + hrb_t *http_request); + +H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest, + const unsigned char *msg, + size_t msg_len, + hbool_t lowercase); + +herr_t H5FD_s3comms_free_purl(parsed_url_t *purl); + +herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key, + size_t key_len, + const char *msg, + size_t msg_len, + char *dest); + +herr_t H5FD_s3comms_load_aws_profile(const char *name, + char *key_id_out, + char *secret_access_key_out, + char *aws_region_out); + +herr_t H5FD_s3comms_nlowercase(char *dest, + const char *s, + size_t len); + +herr_t H5FD_s3comms_parse_url(const char *str, + parsed_url_t **purl); + +herr_t H5FD_s3comms_percent_encode_char(char *repr, + const unsigned char c, + size_t *repr_len); + +H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md, + const char *secret, + const char *region, + const char *iso8601now); + +herr_t H5FD_s3comms_tostringtosign(char *dest, + const char *req_str, + const char *now, + const char *region); + +H5_DLL herr_t H5FD_s3comms_trim(char *dest, + char *s, + size_t s_len, + size_t *n_written); + +H5_DLL herr_t H5FD_s3comms_uriencode(char *dest, + const char *s, + size_t s_len, + hbool_t encode_slash, + size_t *n_written); + + diff --git a/src/Makefile.am b/src/Makefile.am index 0eaae1a..f737d5d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -63,8 +63,8 @@ libhdf5_la_SOURCES= H5.c H5checksum.c H5dbg.c H5system.c H5timer.c H5trace.c \ H5FA.c H5FAcache.c H5FAdbg.c H5FAdblock.c H5FAdblkpage.c H5FAhdr.c \ H5FAint.c H5FAstat.c H5FAtest.c \ H5FD.c H5FDcore.c \ - H5FDfamily.c H5FDint.c H5FDlog.c \ - H5FDmulti.c H5FDsec2.c H5FDspace.c H5FDstdio.c H5FDtest.c \ + H5FDfamily.c H5FDhdfs.c H5FDint.c H5FDlog.c H5FDs3comms.c \ + H5FDmulti.c H5FDros3.c H5FDsec2.c H5FDspace.c H5FDstdio.c H5FDtest.c \ H5FL.c H5FO.c H5FS.c H5FScache.c H5FSdbg.c H5FSint.c H5FSsection.c \ H5FSstat.c H5FStest.c \ H5G.c H5Gbtree2.c H5Gcache.c \ @@ -138,8 +138,8 @@ include_HEADERS = hdf5.h H5api_adpt.h H5overflow.h H5pubconf.h H5public.h H5vers H5Cpublic.h H5Dpublic.h \ H5Epubgen.h H5Epublic.h H5ESpublic.h H5Fpublic.h \ H5FDpublic.h H5FDcore.h H5FDdirect.h \ - H5FDfamily.h H5FDlog.h H5FDmpi.h H5FDmpio.h \ - H5FDmulti.h H5FDsec2.h H5FDstdio.h H5FDwindows.h \ + H5FDfamily.h H5FDhdfs.h H5FDlog.h H5FDmpi.h H5FDmpio.h \ + H5FDmulti.h H5FDros3.h H5FDsec2.h H5FDstdio.h H5FDwindows.h \ H5Gpublic.h H5Ipublic.h H5Lpublic.h \ H5MMpublic.h H5Opublic.h H5Ppublic.h \ H5PLextern.h H5PLpublic.h \ @@ -40,16 +40,18 @@ #include "H5Zpublic.h" /* Data filters */ /* Predefined file drivers */ -#include "H5FDcore.h" /* Files stored entirely in memory */ -#include "H5FDdirect.h" /* Linux direct I/O */ -#include "H5FDfamily.h" /* File families */ +#include "H5FDcore.h" /* Files stored entirely in memory */ +#include "H5FDdirect.h" /* Linux direct I/O */ +#include "H5FDfamily.h" /* File families */ +#include "H5FDhdfs.h" /* Hadoop HDFS */ #include "H5FDlog.h" /* sec2 driver with I/O logging (for debugging) */ -#include "H5FDmpi.h" /* MPI-based file drivers */ -#include "H5FDmulti.h" /* Usage-partitioned file family */ -#include "H5FDsec2.h" /* POSIX unbuffered file I/O */ -#include "H5FDstdio.h" /* Standard C buffered I/O */ +#include "H5FDmpi.h" /* MPI-based file drivers */ +#include "H5FDmulti.h" /* Usage-partitioned file family */ +#include "H5FDros3.h" /* R/O S3 "file" I/O */ +#include "H5FDsec2.h" /* POSIX unbuffered file I/O */ +#include "H5FDstdio.h" /* Standard C buffered I/O */ #ifdef H5_HAVE_WINDOWS -#include "H5FDwindows.h" /* Win32 I/O */ +#include "H5FDwindows.h" /* Win32 I/O */ #endif /* Virtual object layer (VOL) connectors */ diff --git a/src/libhdf5.settings.in b/src/libhdf5.settings.in index f856ebc..baa99ea 100644 --- a/src/libhdf5.settings.in +++ b/src/libhdf5.settings.in @@ -79,6 +79,8 @@ Parallel Filtered Dataset Writes: @PARALLEL_FILTERED_WRITES@ I/O filters (external): @EXTERNAL_FILTERS@ MPE: @MPE@ Direct VFD: @DIRECT_VFD@ + (Read-Only) S3 VFD: @ROS3_VFD@ + (Read-Only) HDFS VFD: @HAVE_LIBHDFS@ dmalloc: @HAVE_DMALLOC@ Packages w/ extra debug output: @INTERNAL_DEBUG_OUTPUT@ API tracing: @TRACE_API@ |