summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLarry Knox <lrknox@hdfgroup.org>2019-07-25 16:36:37 (GMT)
committerLarry Knox <lrknox@hdfgroup.org>2019-07-25 16:47:12 (GMT)
commit8008294578b5a133907d7ab1dd20e34735c54535 (patch)
treed1b9228d468afc05da9333567ea43a04bb0c4272 /src
parentd3fdcd8a680ad0f8b21304b35e8564b774a88ef0 (diff)
downloadhdf5-8008294578b5a133907d7ab1dd20e34735c54535.zip
hdf5-8008294578b5a133907d7ab1dd20e34735c54535.tar.gz
hdf5-8008294578b5a133907d7ab1dd20e34735c54535.tar.bz2
Squashed commit of the following:
Merge changes from update_merged_S3_HDFS branch into develop. commit d5034315aea88629929ac0c9c59ebfafd5f21a31 Merge: 9c48823 d3fdcd8 Author: Larry Knox <lrknox@hdfgroup.org> Date: Thu Jul 25 08:24:53 2019 -0500 Merge branch 'develop' into update_merged_S3_HDFS
Diffstat (limited to 'src')
-rw-r--r--src/CMakeLists.txt7
-rw-r--r--src/H5FDhdfs.c2070
-rw-r--r--src/H5FDhdfs.h122
-rw-r--r--src/H5FDros3.c1847
-rw-r--r--src/H5FDros3.h105
-rw-r--r--src/H5FDs3comms.c3770
-rw-r--r--src/H5FDs3comms.h634
-rw-r--r--src/Makefile.am8
-rw-r--r--src/hdf5.h18
-rw-r--r--src/libhdf5.settings.in2
10 files changed, 8571 insertions, 12 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4106515..2b693bd 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -227,11 +227,14 @@ set (H5FD_SOURCES
${HDF5_SRC_DIR}/H5FDcore.c
${HDF5_SRC_DIR}/H5FDdirect.c
${HDF5_SRC_DIR}/H5FDfamily.c
+ ${HDF5_SRC_DIR}/H5FDhdfs.c
${HDF5_SRC_DIR}/H5FDint.c
${HDF5_SRC_DIR}/H5FDlog.c
${HDF5_SRC_DIR}/H5FDmpi.c
${HDF5_SRC_DIR}/H5FDmpio.c
${HDF5_SRC_DIR}/H5FDmulti.c
+ ${HDF5_SRC_DIR}/H5FDros3.c
+ ${HDF5_SRC_DIR}/H5FDs3comms.c
${HDF5_SRC_DIR}/H5FDsec2.c
${HDF5_SRC_DIR}/H5FDspace.c
${HDF5_SRC_DIR}/H5FDstdio.c
@@ -243,11 +246,14 @@ set (H5FD_HDRS
${HDF5_SRC_DIR}/H5FDcore.h
${HDF5_SRC_DIR}/H5FDdirect.h
${HDF5_SRC_DIR}/H5FDfamily.h
+ ${HDF5_SRC_DIR}/H5FDhdfs.h
${HDF5_SRC_DIR}/H5FDlog.h
${HDF5_SRC_DIR}/H5FDmpi.h
${HDF5_SRC_DIR}/H5FDmpio.h
${HDF5_SRC_DIR}/H5FDmulti.h
${HDF5_SRC_DIR}/H5FDpublic.h
+ ${HDF5_SRC_DIR}/H5FDros3.h
+ ${HDF5_SRC_DIR}/H5FDs3comms.c
${HDF5_SRC_DIR}/H5FDsec2.h
${HDF5_SRC_DIR}/H5FDstdio.h
${HDF5_SRC_DIR}/H5FDwindows.h
@@ -1142,6 +1148,7 @@ if (BUILD_SHARED_LIBS)
add_library (${HDF5_LIBSH_TARGET} SHARED ${common_SRCS} ${shared_gen_SRCS} ${H5_PUBLIC_HEADERS} ${H5_PRIVATE_HEADERS} ${H5_GENERATED_HEADERS})
target_include_directories (${HDF5_LIBSH_TARGET}
PRIVATE "${HDF5_SRC_DIR};${HDF5_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>"
+ PUBLIC "$<$<BOOL:${HDF5_ENABLE_HDFS}>:${HDFS_INCLUDE_DIR}>"
INTERFACE "$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>"
)
target_compile_definitions(${HDF5_LIBSH_TARGET}
diff --git a/src/H5FDhdfs.c b/src/H5FDhdfs.c
new file mode 100644
index 0000000..e3e11b2
--- /dev/null
+++ b/src/H5FDhdfs.c
@@ -0,0 +1,2070 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Read-Only HDFS Virtual File Driver (VFD) *
+ * Copyright (c) 2018, The HDF Group. *
+ * *
+ * All rights reserved. *
+ * *
+ * NOTICE: *
+ * All information contained herein is, and remains, the property of The HDF *
+ * Group. The intellectual and technical concepts contained herein are *
+ * proprietary to The HDF Group. Dissemination of this information or *
+ * reproduction of this material is strictly forbidden unless prior written *
+ * permission is obtained from The HDF Group. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Programmer: Jacob Smith
+ * 2018-04-23
+ *
+ * Purpose: Provide read-only access to files on the Hadoop Distributed
+ * File System (HDFS).
+ */
+
+/* This source code file is part of the H5FD driver module */
+#include "H5FDdrvr_module.h"
+
+#include "H5private.h" /* Generic Functions */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5FDprivate.h" /* File drivers */
+#include "H5FDhdfs.h" /* hdfs file driver */
+#include "H5FLprivate.h" /* Free Lists */
+#include "H5Iprivate.h" /* IDs */
+#include "H5MMprivate.h" /* Memory management */
+
+#ifdef H5_HAVE_LIBHDFS
+#include "hdfs.h"
+#endif
+
+/* toggle function call prints: 1 turns on */
+#define HDFS_DEBUG 0
+
+/* toggle stats collection and reporting */
+#define HDFS_STATS 0
+
+/* The driver identification number, initialized at runtime */
+static hid_t H5FD_HDFS_g = 0;
+
+#if HDFS_STATS
+
+/* arbitrarily large value, such that any reasonable size read will be "less"
+ * than this value and set a true minimum
+ * not 0 because that may be a valid recorded minimum in degenerate cases
+ */
+#define HDFS_STATS_STARTING_MIN 0xfffffffful
+
+/* Configuration definitions for stats collection and breakdown
+ *
+ * 2^10 = 1024
+ * Reads up to 1024 bytes (1 kB) fall in bin 0
+ * 2^(10+(1*16)) = 2^26 = 64MB
+ * Reads of 64MB or greater fall in "overflow" bin[BIN_COUNT]
+ */
+#define HDFS_STATS_BASE 2
+#define HDFS_STATS_INTERVAL 1
+#define HDFS_STATS_START_POWER 10
+#define HDFS_STATS_BIN_COUNT 16 /* MUST BE GREATER THAN 0 */
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Calculate `BASE ^ (START_POWER + (INTERVAL * bin_i))`
+ * Stores result at `(unsigned long long *) out_ptr`.
+ * Used in computing boundaries between stats bins.
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ */
+#define HDFS_STATS_POW(bin_i, out_ptr) { \
+ unsigned long long donotshadowresult = 1; \
+ unsigned donotshadowindex = 0; \
+ for (donotshadowindex = 0; \
+ donotshadowindex < (((bin_i) * HDFS_STATS_INTERVAL) + \
+ HDFS_STATS_START_POWER); \
+ donotshadowindex++) \
+ { \
+ donotshadowresult *= HDFS_STATS_BASE; \
+ } \
+ *(out_ptr) = donotshadowresult; \
+}
+
+/* array to hold pre-computed boundaries for stats bins */
+static unsigned long long hdfs_stats_boundaries[HDFS_STATS_BIN_COUNT];
+
+
+/***************************************************************************
+ *
+ * Structure: hdfs_statsbin
+ *
+ * Purpose:
+ *
+ * Structure for storing per-file hdfs VFD usage statistics.
+ *
+ *
+ *
+ * `count` (unsigned long long)
+ *
+ * Number of reads with size in this bin's range.
+ *
+ * `bytes` (unsigned long long)
+ *
+ * Total number of bytes read through this bin.
+ *
+ * `min` (unsigned long long)
+ *
+ * Smallest read size in this bin.
+ *
+ * `max` (unsigned long long)
+ *
+ * Largest read size in this bin.
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ *
+ * Changes: None
+ *
+ ***************************************************************************/
+typedef struct {
+ unsigned long long count;
+ unsigned long long bytes;
+ unsigned long long min;
+ unsigned long long max;
+} hdfs_statsbin;
+
+#endif /* HDFS_STATS */
+
+/* "unique" identifier for `hdfs_t` structures.
+ * Randomly generated by unweighted dice rolls.
+ */
+#define HDFS_HDFST_MAGIC 0x1AD5DE84
+
+
+/***************************************************************************
+ *
+ * Structure: hdfs_t
+ *
+ * Purpose:
+ *
+ * Contain/retain information associated with a file hosted on Hadoop
+ * Distributed File System (HDFS). Instantiated and populated via
+ * `H5FD_hdfs_handle_open()` and cleaned up via `H5FD_hdfs_handle_close()`.
+ *
+ *
+ *
+ * `magic` (unisgned long)
+ *
+ * Number to indicate that this structure is of the promised
+ * type and should still be valid; should be HDFS_HDFST_MAGIC throughout
+ * the lifespan of the structure. Upon deletion of the structure, the
+ * programmer should set magic to anything but HDFS_HDFST_MAGIC, to
+ * indicate that the structure is to no longer be trusted.
+ *
+ * `filesystem` (hdfsFS)
+ *
+ * A libhdfs file system handle.
+ *
+ * `fileinfo` (hdfsFileInfo*)
+ *
+ * A pointer to a libhdfs file info structure.
+ *
+ * `file` (hdfsFile)
+ *
+ * A libhdfs file handle.
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ * May 2018
+ *
+ * Changes: None
+ *
+ ***************************************************************************
+ */
+typedef struct {
+ unsigned long magic;
+#ifdef H5_HAVE_LIBHDFS
+ hdfsFS filesystem;
+ hdfsFileInfo *fileinfo;
+ hdfsFile file;
+#endif
+} hdfs_t;
+
+#ifdef H5_HAVE_LIBHDFS
+
+/*--------------------------------------------------------------------------
+ * Function: H5FD_hdfs_handle_open
+ *
+ * Purpose: Create a HDFS file handle, 'opening' the target file.
+ *
+ * Return: Success: Pointer to HDFS container/handle of opened file.
+ * Failure: NULL
+ *
+ * Programmer: Gerd Herber
+ * May 2018
+ *
+ * Changes: None.
+ *--------------------------------------------------------------------------
+ */
+static hdfs_t *
+H5FD_hdfs_handle_open(
+ const char *path,
+ const char *namenode_name,
+ const int32_t namenode_port,
+ const char *user_name,
+ const char *kerberos_ticket_cache,
+ const int32_t stream_buffer_size)
+{
+ struct hdfsBuilder *builder = NULL;
+ hdfs_t *handle = NULL;
+ hdfs_t *ret_value = NULL;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "called H5FD_hdfs_handle_open.\n");
+#endif
+
+ if (path == NULL || path[0] == '\0') {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "path cannot be null.\n")
+ }
+ if (namenode_name == NULL /* || namenode_name[0] == '\0' */ ) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "namenode name cannot be null.\n")
+ }
+ if (namenode_port < 0 || namenode_port > 65535) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "namenode port must be non-negative and <= 65535.\n")
+ }
+ if (stream_buffer_size < 0) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "buffer size must non-negative.\n")
+ }
+
+ handle = (hdfs_t *)H5MM_malloc(sizeof(hdfs_t));
+ if (handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL,
+ "could not malloc space for handle.\n")
+ }
+
+ handle->magic = (unsigned long)HDFS_HDFST_MAGIC;
+ handle->filesystem = NULL; /* TODO: not a pointer; NULL may cause bug */
+ handle->fileinfo = NULL;
+ handle->file = NULL; /* TODO: not a pointer; NULL may cause bug */
+
+ builder = hdfsNewBuilder();
+ if (!builder) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "(hdfs) failed to create builder")
+ }
+ hdfsBuilderSetNameNode(builder, namenode_name);
+ hdfsBuilderSetNameNodePort(builder, (tPort)namenode_port);
+ if (user_name != NULL && user_name[0] != '\0') {
+ hdfsBuilderSetUserName(builder, user_name);
+ }
+ if (kerberos_ticket_cache != NULL && kerberos_ticket_cache[0] != '\0') {
+ hdfsBuilderSetKerbTicketCachePath(builder, kerberos_ticket_cache);
+ }
+ /* Call to `hdfsBuilderConnect` releases builder, regardless of success. */
+ handle->filesystem = hdfsBuilderConnect(builder);
+ if (!handle->filesystem) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "(hdfs) could not connect to default namenode")
+ }
+ handle->fileinfo = hdfsGetPathInfo(handle->filesystem, path);
+ if (!handle->fileinfo) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "hdfsGetPathInfo failed")
+ }
+ handle->file = hdfsOpenFile(
+ handle->filesystem,
+ path,
+ O_RDONLY,
+ stream_buffer_size,
+ 0,
+ 0);
+ if (!handle->file) {
+ HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL,
+ "(hdfs) could not open")
+ }
+
+ ret_value = handle;
+
+done:
+ if (ret_value == NULL && handle != NULL) {
+ /* error; clean up */
+ HDassert(handle->magic == HDFS_HDFST_MAGIC);
+ handle->magic++;
+ if (handle->file != NULL) {
+ if (FAIL == (hdfsCloseFile(handle->filesystem, handle->file))) {
+ HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL,
+ "unable to close hdfs file handle")
+ }
+ }
+ if (handle->fileinfo != NULL) {
+ hdfsFreeFileInfo(handle->fileinfo, 1);
+ }
+ if (handle->filesystem != NULL) {
+ if (FAIL == (hdfsDisconnect(handle->filesystem))) {
+ HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL,
+ "unable to disconnect from hdfs")
+ }
+ }
+ H5MM_xfree(handle);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_handle_open() */
+
+
+/*--------------------------------------------------------------------------
+ * Function: H5FD_hdfs_handle_close
+ *
+ * Purpose: 'Close' an HDFS file container/handle, releasing underlying
+ * resources.
+ *
+ * Return: Success: `SUCCEED` (0)
+ * Failure: `FAIL` (-1)
+ *
+ * Programmer: Gerd Herber
+ * May 2018
+ *
+ * Changes: None.
+ *--------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_handle_close(hdfs_t *handle)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "called H5FD_hdfs_close.\n");
+#endif
+
+ if (handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle cannot be null.\n")
+ }
+ if (handle->magic != HDFS_HDFST_MAGIC) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle has invalid magic.\n")
+ }
+
+ handle->magic++;
+ if (handle->file != NULL) {
+ if (FAIL == (hdfsCloseFile(handle->filesystem, handle->file))) {
+ HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
+ "unable to close hdfs file handle")
+ }
+ }
+ if (handle->fileinfo != NULL) {
+ hdfsFreeFileInfo(handle->fileinfo, 1);
+ }
+ if (handle->filesystem != NULL) {
+ if (FAIL == (hdfsDisconnect(handle->filesystem))) {
+ HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
+ "unable to disconnect hdfs file system")
+ }
+ }
+
+ H5MM_xfree(handle);
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_close() */
+#endif /* H5_HAVE_LIBHDFS */
+
+
+/***************************************************************************
+ *
+ * Structure: H5FD_hdfs_t
+ *
+ * Purpose:
+ *
+ * H5FD_hdfs_t is a structure used to store all information needed to
+ * maintain R/O access to a single HDF5 file in an HDFS file system.
+ * This structure is created when such a file is "opened" and
+ * discarded when it is "closed".
+ *
+ *
+ * `pub` (H5FD_t)
+ *
+ * Instance of H5FD_t which contains all fields common to all VFDs.
+ * It must be the first item in this structure, since at higher levels,
+ * this structure will be treated as an instance of H5FD_t.
+ *
+ * `fa` (H5FD_hdfs_fapl_t)
+ *
+ * Instance of `H5FD_hdfs_fapl_t` containing the HDFS configuration data
+ * needed to "open" the HDF5 file.
+ *
+ * `eoa` (haddr_t)
+ *
+ * End of addressed space in file. After open, it should always
+ * equal the file size.
+ *
+ * `hdfs_handle` (hdfs_t *)
+ *
+ * Instance of HDFS Request handle associated with the target resource.
+ * Responsible for communicating with remote host and presenting file
+ * contents as indistinguishable from a file on the local filesystem.
+ *
+ * *** present only if HDFS_SATS is flagged to enable stats collection ***
+ *
+ * `meta` (hdfs_statsbin[])
+ * `raw` (hdfs_statsbin[])
+ *
+ * Only present if hdfs stats collection is enabled.
+ *
+ * Arrays of `hdfs_statsbin` structures to record raw- and metadata reads.
+ *
+ * Records count and size of reads performed by the VFD, and is used to
+ * print formatted usage statistics to stdout upon VFD shutdown.
+ *
+ * Reads of each raw- and metadata type are recorded in an individual bin
+ * determined by the size of the read. The last bin of each type is
+ * reserved for "big" reads, with no defined upper bound.
+ *
+ * *** end HDFS_STATS ***
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ *
+ * Changes: None.
+ *
+ ***************************************************************************
+ */
+typedef struct H5FD_hdfs_t {
+ H5FD_t pub;
+ H5FD_hdfs_fapl_t fa;
+ haddr_t eoa;
+#ifdef H5_HAVE_LIBHDFS
+ hdfs_t *hdfs_handle;
+#endif
+#if HDFS_STATS
+ hdfs_statsbin meta[HDFS_STATS_BIN_COUNT + 1];
+ hdfs_statsbin raw[HDFS_STATS_BIN_COUNT + 1];
+#endif
+} H5FD_hdfs_t;
+
+/*
+ * These macros check for overflow of various quantities. These macros
+ * assume that HDoff_t is signed and haddr_t and size_t are unsigned.
+ *
+ * ADDR_OVERFLOW: Checks whether a file address of type `haddr_t'
+ * is too large to be represented by the second argument
+ * of the file seek function.
+ *
+ */
+#define MAXADDR (((haddr_t)1<<(8*sizeof(HDoff_t)-1))-1)
+#define ADDR_OVERFLOW(A) (HADDR_UNDEF==(A) || ((A) & ~(haddr_t)MAXADDR))
+
+/* Prototypes */
+static herr_t H5FD_hdfs_term(void);
+static void *H5FD_hdfs_fapl_get(H5FD_t *_file);
+static void *H5FD_hdfs_fapl_copy(const void *_old_fa);
+static herr_t H5FD_hdfs_fapl_free(void *_fa);
+static H5FD_t *H5FD_hdfs_open(const char *name, unsigned flags, hid_t fapl_id,
+ haddr_t maxaddr);
+static herr_t H5FD_hdfs_close(H5FD_t *_file);
+static int H5FD_hdfs_cmp(const H5FD_t *_f1, const H5FD_t *_f2);
+static herr_t H5FD_hdfs_query(const H5FD_t *_f1, unsigned long *flags);
+static haddr_t H5FD_hdfs_get_eoa(const H5FD_t *_file, H5FD_mem_t type);
+static herr_t H5FD_hdfs_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr);
+static haddr_t H5FD_hdfs_get_eof(const H5FD_t *_file, H5FD_mem_t type);
+static herr_t H5FD_hdfs_get_handle(H5FD_t *_file, hid_t fapl,
+ void** file_handle);
+static herr_t H5FD_hdfs_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id,
+ haddr_t addr, size_t size, void *buf);
+static herr_t H5FD_hdfs_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id,
+ haddr_t addr, size_t size, const void *buf);
+static herr_t H5FD_hdfs_truncate(H5FD_t *_file, hid_t dxpl_id,
+ hbool_t closing);
+static herr_t H5FD_hdfs_lock(H5FD_t *_file, hbool_t rw);
+static herr_t H5FD_hdfs_unlock(H5FD_t *_file);
+static herr_t H5FD_hdfs_validate_config(const H5FD_hdfs_fapl_t * fa);
+
+static const H5FD_class_t H5FD_hdfs_g = {
+ "hdfs", /* name */
+ MAXADDR, /* maxaddr */
+ H5F_CLOSE_WEAK, /* fc_degree */
+ H5FD_hdfs_term, /* terminate */
+ NULL, /* sb_size */
+ NULL, /* sb_encode */
+ NULL, /* sb_decode */
+ sizeof(H5FD_hdfs_fapl_t), /* fapl_size */
+ H5FD_hdfs_fapl_get, /* fapl_get */
+ H5FD_hdfs_fapl_copy, /* fapl_copy */
+ H5FD_hdfs_fapl_free, /* fapl_free */
+ 0, /* dxpl_size */
+ NULL, /* dxpl_copy */
+ NULL, /* dxpl_free */
+ H5FD_hdfs_open, /* open */
+ H5FD_hdfs_close, /* close */
+ H5FD_hdfs_cmp, /* cmp */
+ H5FD_hdfs_query, /* query */
+ NULL, /* get_type_map */
+ NULL, /* alloc */
+ NULL, /* free */
+ H5FD_hdfs_get_eoa, /* get_eoa */
+ H5FD_hdfs_set_eoa, /* set_eoa */
+ H5FD_hdfs_get_eof, /* get_eof */
+ H5FD_hdfs_get_handle, /* get_handle */
+ H5FD_hdfs_read, /* read */
+ H5FD_hdfs_write, /* write */
+ NULL, /* flush */
+ H5FD_hdfs_truncate, /* truncate */
+ H5FD_hdfs_lock, /* lock */
+ H5FD_hdfs_unlock, /* unlock */
+ H5FD_FLMAP_DICHOTOMY /* fl_map */
+};
+
+/* Declare a free list to manage the H5FD_hdfs_t struct */
+H5FL_DEFINE_STATIC(H5FD_hdfs_t);
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD__init_package
+ *
+ * Purpose: Initializes any interface-specific data or routines.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Changes: Rename as appropriate for hdfs vfd.
+ * Jacob Smith 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD__init_package(void)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_STATIC
+
+ if (H5FD_hdfs_init() < 0) {
+ HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
+ "unable to initialize hdfs VFD")
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD__init_package() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_hdfs_init
+ *
+ * Purpose: Initialize this driver by registering the driver with the
+ * library.
+ *
+ * Return: Success: The driver ID for the hdfs driver.
+ * Failure: Negative
+ *
+ * Programmer: Robb Matzke
+ * Thursday, July 29, 1999
+ *
+ * Changes: Rename as appropriate for hdfs vfd.
+ * Jacob Smith 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+hid_t
+H5FD_hdfs_init(void)
+{
+ hid_t ret_value = H5I_INVALID_HID; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_init() called.\n");
+#endif
+
+ if (H5I_VFL != H5I_get_type(H5FD_HDFS_g)) {
+ H5FD_HDFS_g = H5FD_register(
+ &H5FD_hdfs_g,
+ sizeof(H5FD_class_t),
+ FALSE);
+ }
+
+#if HDFS_STATS
+ /* pre-compute statsbin boundaries
+ */
+ for (unsigned bin_i = 0; bin_i < HDFS_STATS_BIN_COUNT; bin_i++) {
+ unsigned long long value = 0;
+ HDFS_STATS_POW(bin_i, &value)
+ hdfs_stats_boundaries[bin_i] = value;
+ }
+#endif
+
+ ret_value = H5FD_HDFS_g;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_hdfs_init() */
+
+
+/*---------------------------------------------------------------------------
+ * Function: H5FD_hdfs_term
+ *
+ * Purpose: Shut down the VFD
+ *
+ * Returns: SUCCEED (Can't fail)
+ *
+ * Programmer: Quincey Koziol
+ * Friday, Jan 30, 2004
+ *
+ * Changes: Rename as appropriate for hdfs vfd.
+ * Jacob Smith 2018
+ *
+ *---------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_term(void)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_term() called.\n");
+#endif
+
+ /* Reset VFL ID */
+ H5FD_HDFS_g = 0;
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* end H5FD_hdfs_term() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pset_fapl_hdfs
+ *
+ * Purpose: Modify the file access property list to use the H5FD_HDFS
+ * driver defined in this source file. All driver specfic
+ * properties are passed in as a pointer to a suitably
+ * initialized instance of H5FD_hdfs_fapl_t
+ *
+ * Return: SUCCEED/FAIL
+ *
+ * Programmer: John Mainzer
+ * 9/10/17
+ *
+ * Changes: Rename as appropriate for hdfs vfd.
+ * Jacob Smith 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pset_fapl_hdfs(hid_t fapl_id,
+ H5FD_hdfs_fapl_t *fa)
+{
+ H5P_genplist_t *plist = NULL; /* Property list pointer */
+ herr_t ret_value = FAIL;
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "i*x", fapl_id, fa);
+
+ HDassert(fa != NULL);
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5Pset_fapl_hdfs() called.\n");
+#endif
+
+ plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS);
+ if (plist == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \
+ "not a file access property list")
+ }
+
+ if (FAIL == H5FD_hdfs_validate_config(fa)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "invalid hdfs config")
+ }
+
+ ret_value = H5P_set_driver(plist, H5FD_HDFS, (void *)fa);
+
+done:
+ FUNC_LEAVE_API(ret_value)
+
+} /* H5Pset_fapl_hdfs() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_hdfs_validate_config()
+ *
+ * Purpose: Test to see if the supplied instance of H5FD_hdfs_fapl_t
+ * contains internally consistant data. Return SUCCEED if so,
+ * and FAIL otherwise.
+ *
+ * Note the difference between internally consistant and
+ * correct. As we will have to try to access the target
+ * object to determine whether the supplied data is correct,
+ * we will settle for internal consistancy at this point
+ *
+ * Return: SUCCEED if instance of H5FD_hdfs_fapl_t contains internally
+ * consistant data, FAIL otherwise.
+ *
+ * Programmer: Jacob Smith
+ * 9/10/17
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_validate_config(const H5FD_hdfs_fapl_t * fa)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ HDassert(fa != NULL);
+
+ if ( fa->version != H5FD__CURR_HDFS_FAPL_T_VERSION ) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Unknown H5FD_hdfs_fapl_t version");
+ }
+
+ if ( fa->namenode_port > 65535 ) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Invalid namenode port number");
+ }
+ if ( fa->namenode_port < 0 ) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Invalid namenode port number");
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_validate_config() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_fapl_hdfs
+ *
+ * Purpose: Returns information about the hdfs file access property
+ * list though the function arguments.
+ *
+ * Return: Success: Non-negative
+ *
+ * Failure: Negative
+ *
+ * Programmer: John Mainzer
+ * 9/10/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_fapl_hdfs(hid_t fapl_id,
+ H5FD_hdfs_fapl_t *fa_out)
+{
+ const H5FD_hdfs_fapl_t *fa = NULL;
+ H5P_genplist_t *plist = NULL;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "i*x", fapl_id, fa_out);
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5Pget_fapl_hdfs() called.\n");
+#endif
+
+ if (fa_out == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "fa_out is NULL")
+ }
+ plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS);
+ if (plist == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL,
+ "not a file access list")
+ }
+ if (H5FD_HDFS != H5P_peek_driver(plist)) {
+ HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL,
+ "incorrect VFL driver")
+ }
+
+ fa = (const H5FD_hdfs_fapl_t *)H5P_peek_driver_info(plist);
+ if (fa == NULL) {
+ HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL,
+ "bad VFL driver info")
+ }
+
+ /* Copy the hdfs fapl data out */
+ HDmemcpy(fa_out, fa, sizeof(H5FD_hdfs_fapl_t));
+
+done:
+ FUNC_LEAVE_API(ret_value)
+
+} /* H5Pget_fapl_hdfs() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_hdfs_fapl_get
+ *
+ * Purpose: Gets a file access property list which could be used to
+ * create an identical file.
+ *
+ * Return: Success: Ptr to new file access property list value.
+ *
+ * Failure: NULL
+ *
+ * Programmer: John Mainzer
+ * 9/8/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void *
+H5FD_hdfs_fapl_get(H5FD_t *_file)
+{
+ H5FD_hdfs_t *file = (H5FD_hdfs_t*)_file;
+ H5FD_hdfs_fapl_t *fa = NULL;
+ void *ret_value = NULL;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ fa = (H5FD_hdfs_fapl_t *)H5MM_calloc(sizeof(H5FD_hdfs_fapl_t));
+ if (fa == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL,
+ "memory allocation failed")
+ }
+
+ /* Copy the fields of the structure */
+ HDmemcpy(fa, &(file->fa), sizeof(H5FD_hdfs_fapl_t));
+
+ ret_value = fa;
+
+done:
+ if (ret_value == NULL && fa != NULL) {
+ H5MM_xfree(fa); /* clean up on error */
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_fapl_get() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_hdfs_fapl_copy
+ *
+ * Purpose: Copies the hdfs-specific file access properties.
+ *
+ * Return: Success: Ptr to a new property list
+ *
+ * Failure: NULL
+ *
+ * Programmer: John Mainzer
+ * 9/8/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void *
+H5FD_hdfs_fapl_copy(const void *_old_fa)
+{
+ const H5FD_hdfs_fapl_t *old_fa = (const H5FD_hdfs_fapl_t*)_old_fa;
+ H5FD_hdfs_fapl_t *new_fa = NULL;
+ void *ret_value = NULL;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ new_fa = (H5FD_hdfs_fapl_t *)H5MM_malloc(sizeof(H5FD_hdfs_fapl_t));
+ if (new_fa == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL,
+ "memory allocation failed")
+ }
+
+ HDmemcpy(new_fa, old_fa, sizeof(H5FD_hdfs_fapl_t));
+ ret_value = new_fa;
+
+done:
+ if (ret_value == NULL && new_fa != NULL) {
+ H5MM_xfree(new_fa); /* clean up on error */
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_fapl_copy() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_hdfs_fapl_free
+ *
+ * Purpose: Frees the hdfs-specific file access properties.
+ *
+ * Return: SUCCEED (cannot fail)
+ *
+ * Programmer: John Mainzer
+ * 9/8/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_fapl_free(void *_fa)
+{
+ H5FD_hdfs_fapl_t *fa = (H5FD_hdfs_fapl_t*)_fa;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ HDassert(fa != NULL); /* sanity check */
+
+ H5MM_xfree(fa);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* H5FD_hdfs_fapl_free() */
+
+#if HDFS_STATS
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: hdfs_reset_stats()
+ *
+ * Purpose:
+ *
+ * Reset the stats collection elements in this virtual file structure.
+ *
+ * Clears any set data in stats bins; initializes/zeroes values.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ * - Occurs if the file is invalid somehow
+ *
+ * Programmer: Jacob Smith
+ * 2017-12-08
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+static herr_t
+hdfs_reset_stats(H5FD_hdfs_t *file)
+{
+ unsigned i = 0;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if HDFS_DEBUG
+ HDprintf("hdfs_reset_stats() called\n");
+#endif
+
+ if (file == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "file was null")
+ }
+
+ for (i = 0; i <= HDFS_STATS_BIN_COUNT; i++) {
+ file->raw[i].bytes = 0;
+ file->raw[i].count = 0;
+ file->raw[i].min = (unsigned long long)HDFS_STATS_STARTING_MIN;
+ file->raw[i].max = 0;
+
+ file->meta[i].bytes = 0;
+ file->meta[i].count = 0;
+ file->meta[i].min = (unsigned long long)HDFS_STATS_STARTING_MIN;
+ file->meta[i].max = 0;
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* hdfs_reset_stats */
+#endif /* HDFS_STATS */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_open()
+ *
+ * Purpose:
+ *
+ * Create and/or opens a file as an HDF5 file.
+ *
+ * Any flag except H5F_ACC_RDONLY will cause an error.
+ *
+ * Return:
+ *
+ * Success: A pointer to a new file data structure.
+ * The public fields will be initialized by the caller, which is
+ * always H5FD_open().
+ *
+ * Failure: NULL
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static H5FD_t *
+H5FD_hdfs_open(
+ const char *path,
+ unsigned flags,
+ hid_t fapl_id,
+ haddr_t maxaddr)
+{
+ H5FD_t *ret_value = NULL;
+#ifdef H5_HAVE_LIBHDFS
+ H5FD_hdfs_t *file = NULL;
+ hdfs_t *handle = NULL;
+ H5FD_hdfs_fapl_t fa;
+#endif
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifndef H5_HAVE_LIBHDFS
+ HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, NULL,
+ "Illegal open of unsupported virtual file (hdfs)");
+#else
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_open() called.\n");
+#endif /* HDFS_DEBUG */
+
+ /* Sanity check on file offsets */
+ HDcompile_assert(sizeof(HDoff_t) >= sizeof(size_t));
+
+ /* Check arguments */
+ if (!path || !*path) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "invalid file name")
+ }
+ if (0 == maxaddr || HADDR_UNDEF == maxaddr) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADRANGE, NULL,
+ "bogus maxaddr")
+ }
+ if (ADDR_OVERFLOW(maxaddr)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, NULL,
+ "bogus maxaddr")
+ }
+ if (flags != H5F_ACC_RDONLY) {
+ HGOTO_ERROR(H5E_ARGS, H5E_UNSUPPORTED, NULL,
+ "only Read-Only access allowed")
+ }
+ if (fapl_id == H5P_DEFAULT || fapl_id == H5P_FILE_ACCESS_DEFAULT) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "fapl cannot be H5P_DEFAULT")
+ }
+ if (FAIL == H5Pget_fapl_hdfs(fapl_id, &fa)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "can't get property list")
+ }
+
+ handle = H5FD_hdfs_handle_open(
+ path,
+ fa.namenode_name,
+ fa.namenode_port,
+ fa.user_name,
+ fa.kerberos_ticket_cache,
+ fa.stream_buffer_size);
+
+ if (handle == NULL) {
+ HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL,
+ "could not open")
+ }
+
+ HDassert(handle->magic == HDFS_HDFST_MAGIC);
+
+ /* create new file struct
+ */
+ file = H5FL_CALLOC(H5FD_hdfs_t);
+ if (file == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL,
+ "unable to allocate file struct")
+ }
+ file->hdfs_handle = handle;
+ HDmemcpy(&(file->fa), &fa, sizeof(H5FD_hdfs_fapl_t));
+
+#if HDFS_STATS
+ if (FAIL == hdfs_reset_stats(file)) {
+ HGOTO_ERROR(H5E_INTERNAL, H5E_UNINITIALIZED, NULL,
+ "unable to reset file statistics")
+ }
+#endif /* HDFS_STATS */
+
+ ret_value = (H5FD_t*)file;
+#endif /* H5_HAVE_LIBHDFS */
+
+done:
+#ifdef H5_HAVE_LIBHDFS
+ if (ret_value == NULL) {
+ if (handle != NULL) {
+ if (FAIL == H5FD_hdfs_handle_close(handle)) {
+ HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL,
+ "unable to close HDFS file handle")
+ }
+ }
+ if (file != NULL) {
+ file = H5FL_FREE(H5FD_hdfs_t, file);
+ }
+ } /* if null return value (error) */
+#endif /* H5_HAVE_LIBHDFS */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_open() */
+
+#if HDFS_STATS
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: hdfs_fprint_stats()
+ *
+ * Purpose:
+ *
+ * Tabulate and pretty-print statistics for this virtual file.
+ *
+ * Should be called upon file close.
+ *
+ * Shows number of reads and bytes read, broken down by
+ * "raw" (H5FD_MEM_DRAW)
+ * or "meta" (any other flag)
+ *
+ * Prints filename and listing of total number of reads and bytes read,
+ * both as a grand total and separate meta- and rawdata reads.
+ *
+ * If any reads were done, prints out two tables:
+ *
+ * 1. overview of raw- and metadata reads
+ * - min (smallest size read)
+ * - average of size read
+ * - k,M,G suffixes by powers of 1024 (2^10)
+ * - max (largest size read)
+ * 2. tabulation of "bins", sepraring reads into exponentially-larger
+ * ranges of size.
+ * - columns for number of reads, total bytes, and average size, with
+ * separate sub-colums for raw- and metadata reads.
+ * - each row represents one bin, identified by the top of its range
+ *
+ * Bin ranges can be modified with pound-defines at the top of this file.
+ *
+ * Bins without any reads in their bounds are not printed.
+ *
+ * An "overflow" bin is also present, to catch "big" reads.
+ *
+ * Output for all bins (and range ceiling and average size report)
+ * is divied by powers of 1024. By corollary, four digits before the decimal
+ * is valid.
+ *
+ * - 41080 bytes is represented by 40.177k, not 41.080k
+ * - 1004.831M represents approx. 1052642000 bytes
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ * - occurs if the file passed in is invalid
+ * - TODO: if stream is invalid? how can we check this?
+ *
+ * Programmer: Jacob Smith
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+static herr_t
+hdfs_fprint_stats(
+ FILE *stream,
+ const H5FD_hdfs_t *file)
+{
+ herr_t ret_value = SUCCEED;
+ parsed_url_t *purl = NULL;
+ unsigned i = 0;
+ unsigned long count_meta = 0;
+ unsigned long count_raw = 0;
+ double average_meta = 0.0;
+ double average_raw = 0.0;
+ unsigned long long min_meta = (unsigned long long)HDFS_STATS_STARTING_MIN;
+ unsigned long long min_raw = (unsigned long long)HDFS_STATS_STARTING_MIN;
+ unsigned long long max_meta = 0;
+ unsigned long long max_raw = 0;
+ unsigned long long bytes_raw = 0;
+ unsigned long long bytes_meta = 0;
+ double re_dub = 0.0; /* re-usable double variable */
+ unsigned suffix_i = 0;
+ const char suffixes[] = { ' ', 'K', 'M', 'G', 'T', 'P' };
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ if (stream == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "file stream cannot be null" )
+ }
+ if (file == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "file cannot be null")
+ }
+ if (file->hdfs_handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "hdfs handle cannot be null")
+ }
+ if (file->hdfs_handle->magic != HDFS_HDFST_MAGIC) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "hdfs handle has invalid magic")
+ }
+
+ /* TODO: See what libhdfs exposes to us. */
+
+#if 0
+ if (file->s3r_handle->purl == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "parsed url structure cannot be null")
+ }
+ purl = file->s3r_handle->purl;
+
+ /******************
+ * PRINT FILENAME *
+ ******************/
+
+ HDfprintf(stream, "stats for %s://%s", purl->scheme, purl->host);
+ if (purl->port != NULL && purl->port[0] != '\0') {
+ HDfprintf(stream, ":%s", purl->port);
+ }
+ if (purl->query != NULL && purl->query[0] != '\0') {
+ if (purl->path != NULL && purl->path[0] != '\0') {
+ HDfprintf(stream, "/%s", purl->path);
+ } else {
+ HDfprintf(stream, "/");
+ }
+ HDfprintf(stream, "?%s", purl->query);
+ } else if (purl->path != NULL && purl->path[0] != '\0') {
+ HDfprintf(stream, "/%s", purl->path);
+ }
+ HDfprintf(stream, "\n");
+#endif
+
+ /*******************
+ * AGGREGATE STATS *
+ *******************/
+
+ for (i = 0; i <= HDFS_STATS_BIN_COUNT; i++) {
+ const hdfs_statsbin *r = &file->raw[i];
+ const hdfs_statsbin *m = &file->meta[i];
+
+ if (m->min < min_meta) min_meta = m->min;
+ if (r->min < min_raw) min_raw = r->min;
+ if (m->max > max_meta) max_meta = m->max;
+ if (r->max > max_raw) max_raw = r->max;
+
+ count_raw += r->count;
+ count_meta += m->count;
+ bytes_raw += r->bytes;
+ bytes_meta += m->bytes;
+ }
+ if (count_raw > 0) average_raw = (double)bytes_raw / (double)count_raw;
+ if (count_meta > 0) average_meta = (double)bytes_meta / (double)count_meta;
+
+ /******************
+ * PRINT OVERVIEW *
+ ******************/
+
+ HDfprintf(stream, "TOTAL READS: %llu (%llu meta, %llu raw)\n",
+ count_raw + count_meta, count_meta, count_raw);
+ HDfprintf(stream, "TOTAL BYTES: %llu (%llu meta, %llu raw)\n",
+ bytes_raw + bytes_meta, bytes_meta, bytes_raw);
+
+ if (count_raw + count_meta == 0) {
+ goto done;
+ }
+
+ /*************************
+ * PRINT AGGREGATE STATS *
+ *************************/
+
+ HDfprintf(stream, "SIZES meta raw\n");
+ HDfprintf(stream, " min ");
+ if (count_meta == 0) {
+ HDfprintf(stream, " 0.000 ");
+ } else {
+ re_dub = (double)min_meta;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) {
+ re_dub /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]);
+ }
+
+ if (count_raw == 0) {
+ HDfprintf(stream, " 0.000 \n");
+ } else {
+ re_dub = (double)min_raw;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) {
+ re_dub /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]);
+ }
+
+ HDfprintf(stream, " avg ");
+ re_dub = (double)average_meta;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) {
+ re_dub /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]);
+
+ re_dub = (double)average_raw;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) {
+ re_dub /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]);
+
+ HDfprintf(stream, " max ");
+ re_dub = (double)max_meta;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) {
+ re_dub /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]);
+
+ re_dub = (double)max_raw;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) {
+ re_dub /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]);
+
+ /******************************
+ * PRINT INDIVIDUAL BIN STATS *
+ ******************************/
+
+ HDfprintf(stream,
+ "BINS # of reads total bytes average size\n");
+ HDfprintf(stream,
+ " up-to meta raw meta raw meta raw\n");
+
+ for (i = 0; i <= HDFS_STATS_BIN_COUNT; i++) {
+ const hdfs_statsbin *m;
+ const hdfs_statsbin *r;
+ unsigned long long range_end = 0;
+ char bm_suffix = ' '; /* bytes-meta */
+ double bm_val = 0.0;
+ char br_suffix = ' '; /* bytes-raw */
+ double br_val = 0.0;
+ char am_suffix = ' '; /* average-meta */
+ double am_val = 0.0;
+ char ar_suffix = ' '; /* average-raw */
+ double ar_val = 0.0;
+
+ m = &file->meta[i];
+ r = &file->raw[i];
+ if (r->count == 0 && m->count == 0) {
+ continue;
+ }
+
+ range_end = hdfs_stats_boundaries[i];
+
+ if (i == HDFS_STATS_BIN_COUNT) {
+ range_end = hdfs_stats_boundaries[i-1];
+ HDfprintf(stream, ">");
+ } else {
+ HDfprintf(stream, " ");
+ }
+
+ bm_val = (double)m->bytes;
+ for (suffix_i = 0; bm_val >= 1024.0; suffix_i++) {
+ bm_val /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ bm_suffix = suffixes[suffix_i];
+
+ br_val = (double)r->bytes;
+ for (suffix_i = 0; br_val >= 1024.0; suffix_i++) {
+ br_val /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ br_suffix = suffixes[suffix_i];
+
+ if (m->count > 0) {
+ am_val = (double)(m->bytes) / (double)(m->count);
+ }
+ for (suffix_i = 0; am_val >= 1024.0; suffix_i++) {
+ am_val /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ am_suffix = suffixes[suffix_i];
+
+ if (r->count > 0) {
+ ar_val = (double)(r->bytes) / (double)(r->count);
+ }
+ for (suffix_i = 0; ar_val >= 1024.0; suffix_i++) {
+ ar_val /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+ ar_suffix = suffixes[suffix_i];
+
+ re_dub = (double)range_end;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++) {
+ re_dub /= 1024.0;
+ }
+ HDassert(suffix_i < sizeof(suffixes));
+
+ HDfprintf(
+ stream,
+ " %8.3f%c %7d %7d %8.3f%c %8.3f%c %8.3f%c %8.3f%c\n",
+ re_dub, suffixes[suffix_i], /* bin ceiling */
+ m->count, /* metadata reads */
+ r->count, /* rawdata reads */
+ bm_val, bm_suffix, /* metadata bytes */
+ br_val, br_suffix, /* rawdata bytes */
+ am_val, am_suffix, /* metadata average */
+ ar_val, ar_suffix); /* rawdata average */
+ fflush(stream);
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* hdfs_fprint_stats */
+#endif /* HDFS_STATS */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_close()
+ *
+ * Purpose:
+ *
+ * Close an HDF5 file.
+ *
+ * Return:
+ *
+ * SUCCEED/FAIL
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_close(H5FD_t *_file)
+{
+ herr_t ret_value = SUCCEED;
+#ifdef H5_HAVE_LIBHDFS
+ H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file;
+#endif
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifndef H5_HAVE_LIBHDFS
+ HGOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
+ "Illegal close of unsupported Virtual File (hdfs)")
+#else
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_close() called.\n");
+#endif
+
+ /* Sanity checks
+ */
+ HDassert(file != NULL);
+ HDassert(file->hdfs_handle != NULL);
+ HDassert(file->hdfs_handle->magic == HDFS_HDFST_MAGIC);
+
+ /* Close the underlying request handle
+ */
+ if (file->hdfs_handle != NULL) {
+ if (FAIL == H5FD_hdfs_handle_close(file->hdfs_handle)) {
+ HGOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
+ "unable to close HDFS file handle")
+ }
+ }
+
+#if HDFS_STATS
+ /* TODO: mechanism to re-target stats printout */
+ if (FAIL == hdfs_fprint_stats(stdout, file)) {
+ HGOTO_ERROR(H5E_INTERNAL, H5E_ERROR, FAIL,
+ "problem while writing file statistics")
+ }
+#endif /* HDFS_STATS */
+
+ /* Release the file info
+ */
+ file = H5FL_FREE(H5FD_hdfs_t, file);
+#endif /* H5_HAVE_LIBHDFS */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_hdfs_close() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_cmp()
+ *
+ * Purpose:
+ *
+ * Compares two files using this driver by their HDFS-provided file info,
+ * field-by-field.
+ *
+ * Return:
+ * + Equivalent: 0
+ * + Not Equivalent: -1
+ *
+ * Programmer: Gerd Herber
+ * May 2018
+ *
+ * Changes:
+ *
+ * + Replace `if (ret_value == 0)` chain with `HGOTO_DONE` jumps.
+ * Jacob Smith 17 May 2018
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+H5FD_hdfs_cmp(const H5FD_t *_f1,
+ const H5FD_t *_f2)
+{
+ int ret_value = 0;
+#ifdef H5_HAVE_LIBHDFS
+ const H5FD_hdfs_t *f1 = (const H5FD_hdfs_t *)_f1;
+ const H5FD_hdfs_t *f2 = (const H5FD_hdfs_t *)_f2;
+ hdfsFileInfo *finfo1 = NULL;
+ hdfsFileInfo *finfo2 = NULL;
+#endif /* H5_HAVE_LIBHDFS */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#ifdef H5_HAVE_LIBHDFS
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_cmp() called.\n");
+#endif /* HDFS_DEBUG */
+
+ HDassert(f1->hdfs_handle != NULL);
+ HDassert(f2->hdfs_handle != NULL);
+ HDassert(f1->hdfs_handle->magic == HDFS_HDFST_MAGIC);
+ HDassert(f2->hdfs_handle->magic == HDFS_HDFST_MAGIC);
+
+ finfo1 = f1->hdfs_handle->fileinfo;
+ finfo2 = f2->hdfs_handle->fileinfo;
+ HDassert(finfo1 != NULL);
+ HDassert(finfo2 != NULL);
+
+ if (finfo1->mKind != finfo2->mKind) HGOTO_DONE(-1);
+ if (finfo1->mName != finfo2->mName) HGOTO_DONE(-1);
+ if (finfo1->mLastMod != finfo2->mLastMod) HGOTO_DONE(-1);
+ if (finfo1->mSize != finfo2->mSize) HGOTO_DONE(-1);
+ if (finfo1->mReplication != finfo2->mReplication) HGOTO_DONE(-1);
+ if (finfo1->mBlockSize != finfo2->mBlockSize) HGOTO_DONE(-1);
+ if (strcmp(finfo1->mOwner, finfo2->mOwner)) HGOTO_DONE(-1);
+ if (strcmp(finfo1->mGroup, finfo2->mGroup)) HGOTO_DONE(-1);
+ if (finfo1->mPermissions != finfo2->mPermissions) HGOTO_DONE(-1);
+ if (finfo1->mLastAccess != finfo2->mLastAccess) HGOTO_DONE(-1);
+#endif /* H5_HAVE_LIBHDFS */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_cmp() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_hdfs_query
+ *
+ * Purpose: Set the flags that this VFL driver is capable of supporting.
+ * (listed in H5FDpublic.h)
+ *
+ * Note that since the HDFS VFD is read only, most flags
+ * are irrelevant.
+ *
+ * The term "set" is highly misleading...
+ * stores/copies the supported flags in the out-pointer `flags`.
+ *
+ * Return: SUCCEED (Can't fail)
+ *
+ * Programmer: John Mainzer
+ * 9/11/17
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_query(
+ const H5FD_t H5_ATTR_UNUSED *_file,
+ unsigned long *flags) /* out variable */
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_query() called.\n");
+#endif
+
+ if (flags) {
+ *flags = 0;
+ *flags |= H5FD_FEAT_DATA_SIEVE;
+ }
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* H5FD_hdfs_query() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_get_eoa()
+ *
+ * Purpose:
+ *
+ * Gets the end-of-address marker for the file. The EOA marker
+ * is the first address past the last byte allocated in the
+ * format address space.
+ *
+ * Return:
+ *
+ * The end-of-address marker.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static haddr_t
+H5FD_hdfs_get_eoa(
+ const H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type)
+{
+#ifdef H5_HAVE_LIBHDFS
+ const H5FD_hdfs_t *file = (const H5FD_hdfs_t *)_file;
+#endif /* H5_HAVE_LIBHDFS */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_get_eoa() called.\n");
+#endif
+
+#ifdef H5_HAVE_LIBHDFS
+ FUNC_LEAVE_NOAPI(file->eoa)
+#else
+ FUNC_LEAVE_NOAPI(0)
+#endif /* H5_HAVE_LIBHDFS */
+
+} /* end H5FD_hdfs_get_eoa() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_set_eoa()
+ *
+ * Purpose:
+ *
+ * Set the end-of-address marker for the file.
+ *
+ * Return:
+ *
+ * SUCCEED (can't fail)
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-03
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_set_eoa(
+ H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type,
+ haddr_t addr)
+{
+#ifdef H5_HAVE_LIBHDFS
+ H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file;
+#endif /* H5_HAVE_LIBHDFS */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_set_eoa() called.\n");
+#endif
+
+#ifdef H5_HAVE_LIBHDFS
+ file->eoa = addr;
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+#else
+ FUNC_LEAVE_NOAPI(FAIL)
+#endif /* H5_HAVE_LIBHDFS */
+
+} /* H5FD_hdfs_set_eoa() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_get_eof()
+ *
+ * Purpose:
+ *
+ * Returns the end-of-file marker.
+ *
+ * Return:
+ *
+ * EOF: the first address past the end of the "file", either the
+ * filesystem file or the HDF5 file.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ *-------------------------------------------------------------------------
+ */
+static haddr_t
+H5FD_hdfs_get_eof(
+ const H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type)
+{
+#ifdef H5_HAVE_LIBHDFS
+ const H5FD_hdfs_t *file = (const H5FD_hdfs_t *)_file;
+#endif /* H5_HAVE_LIBHDFS */
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_get_eof() called.\n");
+#endif
+
+#ifdef H5_HAVE_LIBHDFS
+ HDassert(file->hdfs_handle != NULL);
+ HDassert(file->hdfs_handle->magic == HDFS_HDFST_MAGIC);
+
+ FUNC_LEAVE_NOAPI((size_t) file->hdfs_handle->fileinfo->mSize)
+#else
+ FUNC_LEAVE_NOAPI((size_t)0)
+#endif /* H5_HAVE_LIBHDFS */
+
+} /* end H5FD_hdfs_get_eof() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_get_handle()
+ *
+ * Purpose:
+ *
+ * Returns the HDFS handle (hdfs_t) of hdfs file driver.
+ *
+ * Returns:
+ *
+ * SUCCEED/FAIL
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_get_handle(
+ H5FD_t *_file,
+ hid_t H5_ATTR_UNUSED fapl,
+ void **file_handle)
+{
+ herr_t ret_value = SUCCEED;
+#ifdef H5_HAVE_LIBHDFS
+ H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file;
+#endif
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_get_handle() called.\n");
+#endif /* HDFS_DEBUG */
+
+#ifdef H5_HAVE_LIBHDFS
+ if (!file_handle) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "file handle not valid")
+ }
+
+ *file_handle = file->hdfs_handle;
+#else
+ HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL,
+ "Illegal get-handle of unsupported virtual file (hdfs)");
+#endif /* H5_HAVE_LIBHDFS */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_hdfs_get_handle() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_read()
+ *
+ * Purpose:
+ *
+ * Reads SIZE bytes of data from FILE beginning at address ADDR
+ * into buffer BUF according to data transfer properties in DXPL_ID.
+ *
+ * Return:
+ *
+ * Success: `SUCCEED`
+ * - Result is stored in caller-supplied buffer BUF.
+ * Failure: `FAIL`
+ * - Unable to complete read.
+ * - Contents of buffer `buf` are undefined.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-??
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_read(
+ H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type,
+ hid_t H5_ATTR_UNUSED dxpl_id,
+ haddr_t addr, /* start offset */
+ size_t size, /* length of read */
+ void *buf) /* out */
+{
+ herr_t ret_value = SUCCEED;
+#if H5_HAVE_LIBHDFS
+ H5FD_hdfs_t *file = (H5FD_hdfs_t *)_file;
+ size_t filesize = 0;
+#endif /* H5_HAVE_LIBHDFS */
+#if HDFS_STATS
+ /* working variables for storing stats */
+ hdfs_statsbin *bin = NULL;
+ unsigned bin_i = 0;
+#endif /* HDFS_STATS */
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_read() called.\n");
+#endif /* HDFS_DEBUG */
+
+#ifndef H5_HAVE_LIBHDFS
+ HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL,
+ "Illegal get-handle of unsupported virtual file (hdfs)");
+#else
+ HDassert(file != NULL);
+ HDassert(file->hdfs_handle != NULL);
+ HDassert(file->hdfs_handle->magic == HDFS_HDFST_MAGIC);
+ HDassert(buf != NULL);
+
+ filesize = (size_t) file->hdfs_handle->fileinfo->mSize;
+
+ if ((addr > filesize) || ((addr + size) > filesize)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL,
+ "range exceeds file address")
+ }
+
+ if (FAIL == hdfsPread(
+ file->hdfs_handle->filesystem,
+ file->hdfs_handle->file,
+ (tOffset)addr,
+ buf,
+ (tSize)size))
+ {
+ HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL,
+ "unable to execute read")
+ }
+
+#if HDFS_STATS
+
+ /* Find which "bin" this read fits in. Can be "overflow" bin.
+ */
+ for (bin_i = 0; bin_i < HDFS_STATS_BIN_COUNT; bin_i++) {
+ if ((unsigned long long)size < hdfs_stats_boundaries[bin_i]) {
+ break;
+ }
+ }
+ bin = (type == H5FD_MEM_DRAW)
+ ? &file->raw[bin_i]
+ : &file->meta[bin_i];
+
+ /* Store collected stats in appropriate bin
+ */
+ if (bin->count == 0) {
+ bin->min = size;
+ bin->max = size;
+ } else {
+ if (size < bin->min) bin->min = size;
+ if (size > bin->max) bin->max = size;
+ }
+ bin->count++;
+ bin->bytes += (unsigned long long)size;
+
+#endif /* HDFS_STATS */
+#endif /* H5_HAVE_LIBHDFS */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_hdfs_read() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_write()
+ *
+ * Purpose:
+ *
+ * Write bytes to file.
+ * UNSUPPORTED IN READ-ONLY HDFS VFD.
+ *
+ * Return:
+ *
+ * FAIL (Not possible with Read-Only S3 file.)
+ *
+ * Programmer: Jacob Smith
+ * 2017-10-23
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_write(
+ H5FD_t H5_ATTR_UNUSED *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type,
+ hid_t H5_ATTR_UNUSED dxpl_id,
+ haddr_t H5_ATTR_UNUSED addr,
+ size_t H5_ATTR_UNUSED size,
+ const void H5_ATTR_UNUSED *buf)
+{
+ herr_t ret_value = FAIL;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_write() called.\n");
+#endif
+
+ HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL,
+ "cannot write to read-only file.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_hdfs_write() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_truncate()
+ *
+ * Purpose:
+ *
+ * Makes sure that the true file size is the same (or larger)
+ * than the end-of-address.
+ *
+ * NOT POSSIBLE ON READ-ONLY S3 FILES.
+ *
+ * Return:
+ *
+ * FAIL (Not possible on Read-Only S3 files.)
+ *
+ * Programmer: Jacob Smith
+ * 2017-10-23
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_truncate(
+ H5FD_t H5_ATTR_UNUSED *_file,
+ hid_t H5_ATTR_UNUSED dxpl_id,
+ hbool_t H5_ATTR_UNUSED closing)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if HDFS_DEBUG
+ HDfprintf(stdout, "H5FD_hdfs_truncate() called.\n");
+#endif
+
+ HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL,
+ "cannot truncate read-only file.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_hdfs_truncate() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_lock()
+ *
+ * Purpose:
+ *
+ * Place an advisory lock on a file.
+ * No effect on Read-Only S3 file.
+ *
+ * Suggestion: remove lock/unlock from class
+ * > would result in error at H5FD_[un]lock() (H5FD.c)
+ *
+ * Return:
+ *
+ * SUCCEED (No-op always succeeds)
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-03
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_lock(
+ H5FD_t H5_ATTR_UNUSED *_file,
+ hbool_t H5_ATTR_UNUSED rw)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* end H5FD_hdfs_lock() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_hdfs_unlock()
+ *
+ * Purpose:
+ *
+ * Remove the existing lock on the file.
+ * No effect on Read-Only S3 file.
+ *
+ * Return:
+ *
+ * SUCCEED (No-op always succeeds)
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-03
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_hdfs_unlock(H5FD_t H5_ATTR_UNUSED *_file)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* end H5FD_hdfs_unlock() */
+
diff --git a/src/H5FDhdfs.h b/src/H5FDhdfs.h
new file mode 100644
index 0000000..3d4128d
--- /dev/null
+++ b/src/H5FDhdfs.h
@@ -0,0 +1,122 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Read-Only HDFS Virtual File Driver (VFD) *
+ * Copyright (c) 2018, The HDF Group. *
+ * *
+ * All rights reserved. *
+ * *
+ * NOTICE: *
+ * All information contained herein is, and remains, the property of The HDF *
+ * Group. The intellectual and technical concepts contained herein are *
+ * proprietary to The HDF Group. Dissemination of this information or *
+ * reproduction of this material is strictly forbidden unless prior written *
+ * permission is obtained from The HDF Group. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Programmer: Jacob Smith
+ * 2018-04-23
+ *
+ * Purpose: The public header file for the hdfs driver.
+ */
+
+#ifndef H5FDhdfs_H
+#define H5FDhdfs_H
+
+#define H5FD_HDFS (H5FD_hdfs_init())
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/****************************************************************************
+ *
+ * Structure: H5FD_hdfs_fapl_t
+ *
+ * Purpose:
+ *
+ * H5FD_hdfs_fapl_t is a public structure that is used to pass
+ * configuration information to the appropriate HDFS VFD via the FAPL.
+ * A pointer to an instance of this structure is a parameter to
+ * H5Pset_fapl_hdfs() and H5Pget_fapl_hdfs().
+ *
+ *
+ *
+ * `version` (int32_t)
+ *
+ * Version number of the `H5FD_hdfs_fapl_t` structure. Any instance passed
+ * to the above calls must have a recognized version number, or an error
+ * will be flagged.
+ *
+ * This field should be set to `H5FD__CURR_HDFS_FAPL_T_VERSION`.
+ *
+ * `namenode_name` (const char[])
+ *
+ * Name of "Name Node" to access as the HDFS server.
+ *
+ * Must not be longer than `H5FD__HDFS_NODE_NAME_SPACE`.
+ *
+ * TBD: Can be NULL.
+ *
+ * `namenode_port` (int32_t) TBD
+ *
+ * Port number to use to connect with Name Node.
+ *
+ * TBD: If 0, uses a default port.
+ *
+ * `kerberos_ticket_cache` (const char[])
+ *
+ * Path to the location of the Kerberos authentication cache.
+ *
+ * Must not be longer than `H5FD__HDFS_KERB_CACHE_PATH_SPACE`.
+ *
+ * TBD: Can be NULL.
+ *
+ * `user_name` (const char[])
+ *
+ * Username to use when accessing file.
+ *
+ * Must not be longer than `H5FD__HDFS_USER_NAME_SPACE`.
+ *
+ * TBD: Can be NULL.
+ *
+ * `stream_buffer_size` (int32_t)
+ *
+ * Size (in bytes) of the file read stream buffer.
+ *
+ * TBD: If -1, relies on a default value.
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ * 2018-04-23
+ *
+ * Changes: None
+ *
+ ****************************************************************************/
+
+#define H5FD__CURR_HDFS_FAPL_T_VERSION 1
+
+#define H5FD__HDFS_NODE_NAME_SPACE 128
+#define H5FD__HDFS_USER_NAME_SPACE 128
+#define H5FD__HDFS_KERB_CACHE_PATH_SPACE 128
+
+typedef struct H5FD_hdfs_fapl_t {
+ int32_t version;
+ char namenode_name[H5FD__HDFS_NODE_NAME_SPACE + 1];
+ int32_t namenode_port;
+ char user_name[H5FD__HDFS_USER_NAME_SPACE + 1];
+ char kerberos_ticket_cache[H5FD__HDFS_KERB_CACHE_PATH_SPACE + 1];
+ int32_t stream_buffer_size;
+} H5FD_hdfs_fapl_t;
+
+H5_DLL hid_t H5FD_hdfs_init(void);
+H5_DLL herr_t H5Pget_fapl_hdfs(hid_t fapl_id, H5FD_hdfs_fapl_t *fa_out);
+H5_DLL herr_t H5Pset_fapl_hdfs(hid_t fapl_id, H5FD_hdfs_fapl_t *fa);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef H5FDhdfs_H */
+
+
diff --git a/src/H5FDros3.c b/src/H5FDros3.c
new file mode 100644
index 0000000..8bf0420
--- /dev/null
+++ b/src/H5FDros3.c
@@ -0,0 +1,1847 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Read-Only S3 Virtual File Driver (VFD) *
+ * Copyright (c) 2017-2018, The HDF Group. *
+ * *
+ * All rights reserved. *
+ * *
+ * NOTICE: *
+ * All information contained herein is, and remains, the property of The HDF *
+ * Group. The intellectual and technical concepts contained herein are *
+ * proprietary to The HDF Group. Dissemination of this information or *
+ * reproduction of this material is strictly forbidden unless prior written *
+ * permission is obtained from The HDF Group. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Programmer: Jacob Smith <jake.smith@hdfgroup.org>
+ * 2017-10-13
+ *
+ * Purpose:
+ *
+ * Provide read-only access to files hosted on Amazon's S3 service.
+ * Relies on "s3comms" utility layer to implement the AWS REST API.
+ */
+
+/* This source code file is part of the H5FD driver module */
+#include "H5FDdrvr_module.h"
+
+#include "H5private.h" /* Generic Functions */
+#include "H5Eprivate.h" /* Error handling */
+#include "H5FDprivate.h" /* File drivers */
+#include "H5FDros3.h" /* ros3 file driver */
+#include "H5FLprivate.h" /* Free Lists */
+#include "H5Iprivate.h" /* IDs */
+#include "H5MMprivate.h" /* Memory management */
+#include "H5FDs3comms.h" /* S3 Communications */
+
+/* toggle function call prints: 1 turns on
+ */
+#define ROS3_DEBUG 0
+
+/* toggle stats collection and reporting
+ */
+#define ROS3_STATS 0
+
+/* The driver identification number, initialized at runtime
+ */
+static hid_t H5FD_ROS3_g = 0;
+
+#if ROS3_STATS
+
+/* arbitrarily large value, such that any reasonable size read will be "less"
+ * than this value and set a true minimum
+ * not 0 because that may be a valid recorded minimum in degenerate cases
+ */
+#define ROS3_STATS_STARTING_MIN 0xfffffffful
+
+/* Configuration definitions for stats collection and breakdown
+ *
+ * 2^10 = 1024
+ * Reads up to 1024 bytes (1 kB) fall in bin 0
+ * 2^(10+(1*16)) = 2^26 = 64MB
+ * Reads of 64MB or greater fall in "overflow" bin[BIN_COUNT]
+ */
+#define ROS3_STATS_BASE 2
+#define ROS3_STATS_INTERVAL 1
+#define ROS3_STATS_START_POWER 10
+#define ROS3_STATS_BIN_COUNT 16 /* MUST BE GREATER THAN 0 */
+
+
+/*
+ * Calculate `BASE ^ (START_POWER + (INTERVAL * bin_i))`
+ * Stores result at `(unsigned long long *) out_ptr`.
+ * Used in computing boundaries between stats bins.
+ */
+#define ROS3_STATS_POW(bin_i, out_ptr) { \
+ unsigned long long donotshadowresult = 1; \
+ unsigned donotshadowindex = 0; \
+ for (donotshadowindex = 0; \
+ donotshadowindex < (((bin_i) * ROS3_STATS_INTERVAL) + \
+ ROS3_STATS_START_POWER); \
+ donotshadowindex++) \
+ { \
+ donotshadowresult *= ROS3_STATS_BASE; \
+ } \
+ *(out_ptr) = donotshadowresult; \
+}
+
+/* array to hold pre-computed boundaries for stats bins
+ */
+static unsigned long long ros3_stats_boundaries[ROS3_STATS_BIN_COUNT];
+
+/***************************************************************************
+ *
+ * Structure: ros3_statsbin
+ *
+ * Purpose:
+ *
+ * Structure for storing per-file ros3 VFD usage statistics.
+ *
+ *
+ *
+ * `count` (unsigned long long)
+ *
+ * Number of reads with size in this bin's range.
+ *
+ * `bytes` (unsigned long long)
+ *
+ * Total number of bytes read through this bin.
+ *
+ * `min` (unsigned long long)
+ *
+ * Smallest read size in this bin.
+ *
+ * `max` (unsigned long long)
+ *
+ * Largest read size in this bin.
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ *
+ * Changes: None
+ *
+ ***************************************************************************/
+typedef struct {
+ unsigned long long count;
+ unsigned long long bytes;
+ unsigned long long min;
+ unsigned long long max;
+} ros3_statsbin;
+
+#endif /* ROS3_STATS */
+
+/***************************************************************************
+ *
+ * Structure: H5FD_ros3_t
+ *
+ * Purpose:
+ *
+ * H5FD_ros3_t is a structure used to store all information needed to
+ * maintain R/O access to a single HDF5 file that has been stored as a
+ * S3 object. This structure is created when such a file is "opened" and
+ * discarded when it is "closed".
+ *
+ * Presents an S3 object as a file to the HDF5 library.
+ *
+ *
+ *
+ * `pub` (H5FD_t)
+ *
+ * Instance of H5FD_t which contains all fields common to all VFDs.
+ * It must be the first item in this structure, since at higher levels,
+ * this structure will be treated as an instance of H5FD_t.
+ *
+ * `fa` (H5FD_ros3_fapl_t)
+ *
+ * Instance of `H5FD_ros3_fapl_t` containing the S3 configuration data
+ * needed to "open" the HDF5 file.
+ *
+ * `eoa` (haddr_t)
+ *
+ * End of addressed space in file. After open, it should always
+ * equal the file size.
+ *
+ * `s3r_handle` (s3r_t *)
+ *
+ * Instance of S3 Request handle associated with the target resource.
+ * Responsible for communicating with remote host and presenting file
+ * contents as indistinguishable from a file on the local filesystem.
+ *
+ * *** present only if ROS3_SATS is flagged to enable stats collection ***
+ *
+ * `meta` (ros3_statsbin[])
+ * `raw` (ros3_statsbin[])
+ *
+ * Only present if ros3 stats collection is enabled.
+ *
+ * Arrays of `ros3_statsbin` structures to record raw- and metadata reads.
+ *
+ * Records count and size of reads performed by the VFD, and is used to
+ * print formatted usage statistics to stdout upon VFD shutdown.
+ *
+ * Reads of each raw- and metadata type are recorded in an individual bin
+ * determined by the size of the read. The last bin of each type is
+ * reserved for "big" reads, with no defined upper bound.
+ *
+ * *** end ROS3_STATS ***
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ *
+ * Changes: None.
+ *
+ ***************************************************************************/
+typedef struct H5FD_ros3_t {
+ H5FD_t pub;
+ H5FD_ros3_fapl_t fa;
+ haddr_t eoa;
+ s3r_t *s3r_handle;
+#if ROS3_STATS
+ ros3_statsbin meta[ROS3_STATS_BIN_COUNT + 1];
+ ros3_statsbin raw[ROS3_STATS_BIN_COUNT + 1];
+#endif
+} H5FD_ros3_t;
+
+/*
+ * These macros check for overflow of various quantities. These macros
+ * assume that HDoff_t is signed and haddr_t and size_t are unsigned.
+ *
+ * ADDR_OVERFLOW: Checks whether a file address of type `haddr_t'
+ * is too large to be represented by the second argument
+ * of the file seek function.
+ *
+ */
+#define MAXADDR (((haddr_t)1<<(8*sizeof(HDoff_t)-1))-1)
+#define ADDR_OVERFLOW(A) (HADDR_UNDEF==(A) || ((A) & ~(haddr_t)MAXADDR))
+
+/* Prototypes */
+static herr_t H5FD_ros3_term(void);
+static void *H5FD_ros3_fapl_get(H5FD_t *_file);
+static void *H5FD_ros3_fapl_copy(const void *_old_fa);
+static herr_t H5FD_ros3_fapl_free(void *_fa);
+static H5FD_t *H5FD_ros3_open(const char *name, unsigned flags, hid_t fapl_id,
+ haddr_t maxaddr);
+static herr_t H5FD_ros3_close(H5FD_t *_file);
+static int H5FD_ros3_cmp(const H5FD_t *_f1, const H5FD_t *_f2);
+static herr_t H5FD_ros3_query(const H5FD_t *_f1, unsigned long *flags);
+static haddr_t H5FD_ros3_get_eoa(const H5FD_t *_file, H5FD_mem_t type);
+static herr_t H5FD_ros3_set_eoa(H5FD_t *_file, H5FD_mem_t type, haddr_t addr);
+static haddr_t H5FD_ros3_get_eof(const H5FD_t *_file, H5FD_mem_t type);
+static herr_t H5FD_ros3_get_handle(H5FD_t *_file, hid_t fapl,
+ void** file_handle);
+static herr_t H5FD_ros3_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id,
+ haddr_t addr, size_t size, void *buf);
+static herr_t H5FD_ros3_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id,
+ haddr_t addr, size_t size, const void *buf);
+static herr_t H5FD_ros3_truncate(H5FD_t *_file, hid_t dxpl_id,
+ hbool_t closing);
+static herr_t H5FD_ros3_lock(H5FD_t *_file, hbool_t rw);
+static herr_t H5FD_ros3_unlock(H5FD_t *_file);
+static herr_t H5FD_ros3_validate_config(const H5FD_ros3_fapl_t * fa);
+
+static const H5FD_class_t H5FD_ros3_g = {
+ "ros3", /* name */
+ MAXADDR, /* maxaddr */
+ H5F_CLOSE_WEAK, /* fc_degree */
+ H5FD_ros3_term, /* terminate */
+ NULL, /* sb_size */
+ NULL, /* sb_encode */
+ NULL, /* sb_decode */
+ sizeof(H5FD_ros3_fapl_t), /* fapl_size */
+ H5FD_ros3_fapl_get, /* fapl_get */
+ H5FD_ros3_fapl_copy, /* fapl_copy */
+ H5FD_ros3_fapl_free, /* fapl_free */
+ 0, /* dxpl_size */
+ NULL, /* dxpl_copy */
+ NULL, /* dxpl_free */
+ H5FD_ros3_open, /* open */
+ H5FD_ros3_close, /* close */
+ H5FD_ros3_cmp, /* cmp */
+ H5FD_ros3_query, /* query */
+ NULL, /* get_type_map */
+ NULL, /* alloc */
+ NULL, /* free */
+ H5FD_ros3_get_eoa, /* get_eoa */
+ H5FD_ros3_set_eoa, /* set_eoa */
+ H5FD_ros3_get_eof, /* get_eof */
+ H5FD_ros3_get_handle, /* get_handle */
+ H5FD_ros3_read, /* read */
+ H5FD_ros3_write, /* write */
+ NULL, /* flush */
+ H5FD_ros3_truncate, /* truncate */
+ H5FD_ros3_lock, /* lock */
+ H5FD_ros3_unlock, /* unlock */
+ H5FD_FLMAP_DICHOTOMY /* fl_map */
+};
+
+/* Declare a free list to manage the H5FD_ros3_t struct */
+H5FL_DEFINE_STATIC(H5FD_ros3_t);
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD__init_package
+ *
+ * Purpose: Initializes any interface-specific data or routines.
+ *
+ * Return: Non-negative on success/Negative on failure
+ *
+ * Changes: Rename as appropriate for ros3 vfd.
+ * Jacob Smith 2017
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD__init_package(void)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_STATIC
+
+ if (H5FD_ros3_init() < 0) {
+ HGOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
+ "unable to initialize ros3 VFD")
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD__init_package() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_ros3_init
+ *
+ * Purpose: Initialize this driver by registering the driver with the
+ * library.
+ *
+ * Return: Success: The driver ID for the ros3 driver.
+ * Failure: Negative
+ *
+ * Programmer: Robb Matzke
+ * Thursday, July 29, 1999
+ *
+ * Changes: Rename as appropriate for ros3 vfd.
+ * Jacob Smith 2017
+ *
+ *-------------------------------------------------------------------------
+ */
+hid_t
+H5FD_ros3_init(void)
+{
+ hid_t ret_value = H5I_INVALID_HID; /* Return value */
+
+ FUNC_ENTER_NOAPI(FAIL)
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_init() called.\n");
+#endif
+
+ if (H5I_VFL != H5I_get_type(H5FD_ROS3_g))
+ H5FD_ROS3_g = H5FD_register(&H5FD_ros3_g, sizeof(H5FD_class_t), FALSE);
+
+#if ROS3_STATS
+ /* pre-compute statsbin boundaries
+ */
+ for (unsigned bin_i = 0; bin_i < ROS3_STATS_BIN_COUNT; bin_i++) {
+ unsigned long long value = 0;
+ ROS3_STATS_POW(bin_i, &value)
+ ros3_stats_boundaries[bin_i] = value;
+ }
+#endif
+
+ /* Set return value */
+ ret_value = H5FD_ROS3_g;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_ros3_init() */
+
+
+/*---------------------------------------------------------------------------
+ * Function: H5FD_ros3_term
+ *
+ * Purpose: Shut down the VFD
+ *
+ * Returns: SUCCEED (Can't fail)
+ *
+ * Programmer: Quincey Koziol
+ * Friday, Jan 30, 2004
+ *
+ * Changes: Rename as appropriate for ros3 vfd.
+ * Jacob Smith 2017
+ *
+ *---------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_term(void)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_term() called.\n");
+#endif
+
+ /* Reset VFL ID */
+ H5FD_ROS3_g = 0;
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* end H5FD_ros3_term() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pset_fapl_ros3
+ *
+ * Purpose: Modify the file access property list to use the H5FD_ROS3
+ * driver defined in this source file. All driver specfic
+ * properties are passed in as a pointer to a suitably
+ * initialized instance of H5FD_ros3_fapl_t
+ *
+ * Return: SUCCEED/FAIL
+ *
+ * Programmer: John Mainzer
+ * 9/10/17
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pset_fapl_ros3(hid_t fapl_id,
+ H5FD_ros3_fapl_t *fa)
+{
+ H5P_genplist_t *plist = NULL; /* Property list pointer */
+ herr_t ret_value = FAIL;
+
+
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "i*x", fapl_id, fa);
+
+ HDassert(fa != NULL);
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5Pset_fapl_ros3() called.\n");
+#endif
+
+ plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS);
+ if (plist == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, \
+ "not a file access property list")
+ }
+
+ if (FAIL == H5FD_ros3_validate_config(fa))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid ros3 config")
+
+ ret_value = H5P_set_driver(plist, H5FD_ROS3, (void *)fa);
+
+done:
+
+ FUNC_LEAVE_API(ret_value)
+
+} /* H5Pset_fapl_ros3() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_ros3_validate_config()
+ *
+ * Purpose: Test to see if the supplied instance of H5FD_ros3_fapl_t
+ * contains internally consistant data. Return SUCCEED if so,
+ * and FAIL otherwise.
+ *
+ * Note the difference between internally consistant and
+ * correct. As we will have to try to access the target
+ * object to determine whether the supplied data is correct,
+ * we will settle for internal consistancy at this point
+ *
+ * Return: SUCCEED if instance of H5FD_ros3_fapl_t contains internally
+ * consistant data, FAIL otherwise.
+ *
+ * Programmer: Jacob Smith
+ * 9/10/17
+ *
+ * Changes: Add checks for authenticate flag requring populated
+ * `aws_region` and `secret_id` strings.
+ * -- Jacob Smith 2017-11-01
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_validate_config(const H5FD_ros3_fapl_t * fa)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ HDassert(fa != NULL);
+
+ if ( fa->version != H5FD__CURR_ROS3_FAPL_T_VERSION ) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Unknown H5FD_ros3_fapl_t version");
+ }
+
+ /* if set to authenticate, region and id cannot be empty strings
+ */
+ if (fa->authenticate == TRUE) {
+ if ((fa->aws_region[0] == '\0') ||
+ (fa->secret_id[0] == '\0'))
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Inconsistent authentication information");
+ }
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_ros3_validate_config() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5Pget_fapl_ros3
+ *
+ * Purpose: Returns information about the ros3 file access property
+ * list though the function arguments.
+ *
+ * Return: Success: Non-negative
+ *
+ * Failure: Negative
+ *
+ * Programmer: John Mainzer
+ * 9/10/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+herr_t
+H5Pget_fapl_ros3(hid_t fapl_id,
+ H5FD_ros3_fapl_t *fa_out)
+{
+ const H5FD_ros3_fapl_t *fa;
+ H5P_genplist_t *plist = NULL; /* Property list pointer */
+ herr_t ret_value = SUCCEED; /* Return value */
+
+ FUNC_ENTER_API(FAIL)
+ H5TRACE2("e", "i*x", fapl_id, fa_out);
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5Pget_fapl_ros3() called.\n");
+#endif
+
+ if (fa_out == NULL)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "fa_out is NULL")
+
+ plist = H5P_object_verify(fapl_id, H5P_FILE_ACCESS);
+ if (plist == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access list")
+ }
+
+ if (H5FD_ROS3 != H5P_peek_driver(plist)) {
+ HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "incorrect VFL driver")
+ }
+
+ fa = (const H5FD_ros3_fapl_t *)H5P_peek_driver_info(plist);
+ if (fa == NULL) {
+ HGOTO_ERROR(H5E_PLIST, H5E_BADVALUE, FAIL, "bad VFL driver info")
+ }
+
+ /* Copy the ros3 fapl data out */
+ HDmemcpy(fa_out, fa, sizeof(H5FD_ros3_fapl_t));
+
+done:
+ FUNC_LEAVE_API(ret_value)
+
+} /* H5Pget_fapl_ros3() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_ros3_fapl_get
+ *
+ * Purpose: Gets a file access property list which could be used to
+ * create an identical file.
+ *
+ * Return: Success: Ptr to new file access property list value.
+ *
+ * Failure: NULL
+ *
+ * Programmer: John Mainzer
+ * 9/8/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void *
+H5FD_ros3_fapl_get(H5FD_t *_file)
+{
+ H5FD_ros3_t *file = (H5FD_ros3_t*)_file;
+ H5FD_ros3_fapl_t *fa = NULL;
+ void *ret_value = NULL;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ fa = (H5FD_ros3_fapl_t *)H5MM_calloc(sizeof(H5FD_ros3_fapl_t));
+ if (fa == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL,
+ "memory allocation failed")
+ }
+
+ /* Copy the fields of the structure */
+ HDmemcpy(fa, &(file->fa), sizeof(H5FD_ros3_fapl_t));
+
+ /* Set return value */
+ ret_value = fa;
+
+done:
+ if (ret_value == NULL) {
+ if (fa != NULL)
+ H5MM_xfree(fa);
+ }
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_ros3_fapl_get() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_ros3_fapl_copy
+ *
+ * Purpose: Copies the ros3-specific file access properties.
+ *
+ * Return: Success: Ptr to a new property list
+ *
+ * Failure: NULL
+ *
+ * Programmer: John Mainzer
+ * 9/8/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static void *
+H5FD_ros3_fapl_copy(const void *_old_fa)
+{
+ const H5FD_ros3_fapl_t *old_fa = (const H5FD_ros3_fapl_t*)_old_fa;
+ H5FD_ros3_fapl_t *new_fa = NULL;
+ void *ret_value = NULL; /* Return value */
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ new_fa = (H5FD_ros3_fapl_t *)H5MM_malloc(sizeof(H5FD_ros3_fapl_t));
+ if (new_fa == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL,
+ "memory allocation failed");
+ }
+
+ HDmemcpy(new_fa, old_fa, sizeof(H5FD_ros3_fapl_t));
+ ret_value = new_fa;
+
+done:
+ if (ret_value == NULL) {
+ if (new_fa != NULL)
+ H5MM_xfree(new_fa);
+ }
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_ros3_fapl_copy() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_ros3_fapl_free
+ *
+ * Purpose: Frees the ros3-specific file access properties.
+ *
+ * Return: SUCCEED (cannot fail)
+ *
+ * Programmer: John Mainzer
+ * 9/8/17
+ *
+ * Modifications:
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_fapl_free(void *_fa)
+{
+ H5FD_ros3_fapl_t *fa = (H5FD_ros3_fapl_t*)_fa;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+ HDassert(fa != NULL); /* sanity check */
+
+ H5MM_xfree(fa);
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* H5FD_ros3_fapl_free() */
+
+#if ROS3_STATS
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: ros3_reset_stats()
+ *
+ * Purpose:
+ *
+ * Reset the stats collection elements in this virtual file structure.
+ *
+ * Clears any set data in stats bins; initializes/zeroes values.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ * - Occurs if the file is invalid somehow
+ *
+ * Programmer: Jacob Smith
+ * 2017-12-08
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+static herr_t
+ros3_reset_stats(H5FD_ros3_t *file)
+{
+ unsigned i = 0;
+ herr_t ret_value = SUCCEED;
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if ROS3_DEBUG
+ HDprintf("ros3_reset_stats() called\n");
+#endif
+
+ if (file == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "file was null");
+ }
+
+ for (i = 0; i <= ROS3_STATS_BIN_COUNT; i++) {
+ file->raw[i].bytes = 0;
+ file->raw[i].count = 0;
+ file->raw[i].min = (unsigned long long)ROS3_STATS_STARTING_MIN;
+ file->raw[i].max = 0;
+
+ file->meta[i].bytes = 0;
+ file->meta[i].count = 0;
+ file->meta[i].min = (unsigned long long)ROS3_STATS_STARTING_MIN;
+ file->meta[i].max = 0;
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* ros3_reset_stats */
+#endif /* ROS3_STATS */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_open()
+ *
+ * Purpose:
+ *
+ * Create and/or opens a file as an HDF5 file.
+ *
+ * Any flag except H5F_ACC_RDONLY will cause an error.
+ *
+ * Name (as received from `H5FD_open()`) must conform to web url:
+ * NAME :: HTTP "://" DOMAIN [PORT] ["/" [URI] [QUERY] ]
+ * HTTP :: "http" [ "s" ]
+ * DOMAIN :: e.g., "mybucket.host.org"
+ * PORT :: ":" <number> (e.g., ":9000" )
+ * URI :: <string> (e.g., "path/to/resource.hd5" )
+ * QUERY :: "?" <string> (e.g., "arg1=param1&arg2=param2")
+ *
+ * Return:
+ *
+ * Success: A pointer to a new file data structure.
+ * The public fields will be initialized by the caller, which is
+ * always H5FD_open().
+ *
+ * Failure: NULL
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static H5FD_t *
+H5FD_ros3_open(const char *url,
+ unsigned flags,
+ hid_t fapl_id,
+ haddr_t maxaddr)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ H5FD_ros3_t *file = NULL;
+ struct tm *now = NULL;
+ char iso8601now[ISO8601_SIZE];
+ unsigned char signing_key[SHA256_DIGEST_LENGTH];
+ s3r_t *handle = NULL;
+ H5FD_ros3_fapl_t fa;
+#endif
+ H5FD_t *ret_value = NULL;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+#ifdef H5_HAVE_ROS3_VFD
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_open() called.\n");
+#endif
+
+
+ /* Sanity check on file offsets */
+ HDcompile_assert(sizeof(HDoff_t) >= sizeof(size_t));
+
+ /* Check arguments */
+ if (!url || !*url)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, "invalid file name")
+ if (0 == maxaddr || HADDR_UNDEF == maxaddr)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADRANGE, NULL, "bogus maxaddr")
+ if (ADDR_OVERFLOW(maxaddr))
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, NULL, "bogus maxaddr")
+ if (flags != H5F_ACC_RDONLY)
+ HGOTO_ERROR(H5E_ARGS, H5E_UNSUPPORTED, NULL,
+ "only Read-Only access allowed")
+
+ if (FAIL == H5Pget_fapl_ros3(fapl_id, &fa)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL, "can't get property list")
+ }
+
+ if (CURLE_OK != curl_global_init(CURL_GLOBAL_DEFAULT)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "unable to initialize curl global (placeholder flags)")
+ }
+
+ /* open file; procedure depends on whether or not the fapl instructs to
+ * authenticate requests or not.
+ */
+ if (fa.authenticate == TRUE) {
+ /* compute signing key (part of AWS/S3 REST API)
+ * can be re-used by user/key for 7 days after creation.
+ * find way to re-use/share
+ */
+ now = gmnow();
+ HDassert( now != NULL );
+ if (ISO8601NOW(iso8601now, now) != (ISO8601_SIZE - 1)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "problem while writing iso8601 timestamp")
+ }
+ if (FAIL == H5FD_s3comms_signing_key(signing_key,
+ (const char *)fa.secret_key,
+ (const char *)fa.aws_region,
+ (const char *)iso8601now) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "problem while computing signing key")
+ }
+
+ handle = H5FD_s3comms_s3r_open(
+ url,
+ (const char *)fa.aws_region,
+ (const char *)fa.secret_id,
+ (const unsigned char *)signing_key);
+ } else {
+ handle = H5FD_s3comms_s3r_open(url, NULL, NULL, NULL);
+ } /* if/else should authenticate */
+
+ if (handle == NULL) {
+ /* If we want to check CURL's say on the matter in a controlled
+ * fashion, this is the place to do it, but would need to make a
+ * few minor changes to s3comms `s3r_t` and `s3r_read()`.
+ */
+ HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, NULL, "could not open");
+ }
+
+ /* create new file struct
+ */
+ file = H5FL_CALLOC(H5FD_ros3_t);
+ if (file == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, NULL,
+ "unable to allocate file struct")
+ }
+
+ file->s3r_handle = handle;
+ HDmemcpy(&(file->fa), &fa, sizeof(H5FD_ros3_fapl_t));
+
+#if ROS3_STATS
+ if (FAIL == ros3_reset_stats(file)) {
+ HGOTO_ERROR(H5E_INTERNAL, H5E_UNINITIALIZED, NULL,
+ "unable to reset file statistics")
+ }
+#endif /* ROS3_STATS */
+
+ ret_value = (H5FD_t*)file;
+
+done:
+ if (ret_value == NULL) {
+ if (handle != NULL) {
+ if (FAIL == H5FD_s3comms_s3r_close(handle)) {
+ HDONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, NULL,
+ "unable to close s3 file handle")
+ }
+ }
+ if (file != NULL) {
+ file = H5FL_FREE(H5FD_ros3_t, file);
+ }
+ curl_global_cleanup(); /* early cleanup because open failed */
+ } /* if null return value (error) */
+#endif /* H5_HAVE_ROS3_VFD */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_ros3_open() */
+
+#if ROS3_STATS
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: ros3_fprint_stats()
+ *
+ * Purpose:
+ *
+ * Tabulate and pretty-print statistics for this virtual file.
+ *
+ * Should be called upon file close.
+ *
+ * Shows number of reads and bytes read, broken down by
+ * "raw" (H5FD_MEM_DRAW)
+ * or "meta" (any other flag)
+ *
+ * Prints filename and listing of total number of reads and bytes read,
+ * both as a grand total and separate meta- and rawdata reads.
+ *
+ * If any reads were done, prints out two tables:
+ *
+ * 1. overview of raw- and metadata reads
+ * - min (smallest size read)
+ * - average of size read
+ * - k,M,G suffixes by powers of 1024 (2^10)
+ * - max (largest size read)
+ * 2. tabulation of "bins", sepraring reads into exponentially-larger
+ * ranges of size.
+ * - columns for number of reads, total bytes, and average size, with
+ * separate sub-colums for raw- and metadata reads.
+ * - each row represents one bin, identified by the top of its range
+ *
+ * Bin ranges can be modified with pound-defines at the top of this file.
+ *
+ * Bins without any reads in their bounds are not printed.
+ *
+ * An "overflow" bin is also present, to catch "big" reads.
+ *
+ * Output for all bins (and range ceiling and average size report)
+ * is divied by powers of 1024. By corollary, four digits before the decimal
+ * is valid.
+ *
+ * - 41080 bytes is represented by 40.177k, not 41.080k
+ * - 1004.831M represents approx. 1052642000 bytes
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ * - occurs if the file passed in is invalid
+ * - TODO: if stream is invalid? how can we check this?
+ *
+ * Programmer: Jacob Smith
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+static herr_t
+ros3_fprint_stats(FILE *stream,
+ const H5FD_ros3_t *file)
+{
+ herr_t ret_value = SUCCEED;
+ parsed_url_t *purl = NULL;
+ unsigned i = 0;
+ unsigned long count_meta = 0;
+ unsigned long count_raw = 0;
+ double average_meta = 0.0;
+ double average_raw = 0.0;
+ unsigned long long min_meta = (unsigned long long)ROS3_STATS_STARTING_MIN;
+ unsigned long long min_raw = (unsigned long long)ROS3_STATS_STARTING_MIN;
+ unsigned long long max_meta = 0;
+ unsigned long long max_raw = 0;
+ unsigned long long bytes_raw = 0;
+ unsigned long long bytes_meta = 0;
+ double re_dub = 0.0; /* re-usable double variable */
+ unsigned suffix_i = 0;
+ const char suffixes[] = { ' ', 'K', 'M', 'G', 'T', 'P' };
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+ if (stream == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "file stream cannot be null" );
+ }
+ if (file == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "file cannot be null");
+ }
+ if (file->s3r_handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "s3 request handle cannot be null");
+ }
+ if (file->s3r_handle->purl == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "parsed url structure cannot be null");
+ }
+ purl = file->s3r_handle->purl;
+
+ /******************
+ * PRINT FILENAME *
+ ******************/
+
+ HDfprintf(stream, "stats for %s://%s", purl->scheme, purl->host);
+ if (purl->port != NULL && purl->port[0] != '\0')
+ HDfprintf(stream, ":%s", purl->port);
+ if (purl->query != NULL && purl->query[0] != '\0') {
+ if (purl->path != NULL && purl->path[0] != '\0')
+ HDfprintf(stream, "/%s", purl->path);
+ else
+ HDfprintf(stream, "/");
+ HDfprintf(stream, "?%s", purl->query);
+ } else if (purl->path != NULL && purl->path[0] != '\0') {
+ HDfprintf(stream, "/%s", purl->path);
+ }
+ HDfprintf(stream, "\n");
+
+ /*******************
+ * AGGREGATE STATS *
+ *******************/
+
+ for (i = 0; i <= ROS3_STATS_BIN_COUNT; i++) {
+ const ros3_statsbin *r = &file->raw[i];
+ const ros3_statsbin *m = &file->meta[i];
+
+ if (m->min < min_meta) min_meta = m->min;
+ if (r->min < min_raw) min_raw = r->min;
+ if (m->max > max_meta) max_meta = m->max;
+ if (r->max > max_raw) max_raw = r->max;
+
+ count_raw += r->count;
+ count_meta += m->count;
+ bytes_raw += r->bytes;
+ bytes_meta += m->bytes;
+ }
+ if (count_raw > 0)
+ average_raw = (double)bytes_raw / (double)count_raw;
+ if (count_meta > 0)
+ average_meta = (double)bytes_meta / (double)count_meta;
+
+ /******************
+ * PRINT OVERVIEW *
+ ******************/
+
+ HDfprintf(stream, "TOTAL READS: %llu (%llu meta, %llu raw)\n",
+ count_raw + count_meta, count_meta, count_raw);
+ HDfprintf(stream, "TOTAL BYTES: %llu (%llu meta, %llu raw)\n",
+ bytes_raw + bytes_meta, bytes_meta, bytes_raw);
+
+ if (count_raw + count_meta == 0)
+ goto done;
+
+ /*************************
+ * PRINT AGGREGATE STATS *
+ *************************/
+
+ HDfprintf(stream, "SIZES meta raw\n");
+ HDfprintf(stream, " min ");
+ if (count_meta == 0) {
+ HDfprintf(stream, " 0.000 ");
+ } else {
+ re_dub = (double)min_meta;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++)
+ re_dub /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]);
+ }
+
+ if (count_raw == 0) {
+ HDfprintf(stream, " 0.000 \n");
+ } else {
+ re_dub = (double)min_raw;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++)
+ re_dub /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]);
+ }
+
+ HDfprintf(stream, " avg ");
+ re_dub = (double)average_meta;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++)
+ re_dub /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]);
+
+ re_dub = (double)average_raw;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++)
+ re_dub /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]);
+
+ HDfprintf(stream, " max ");
+ re_dub = (double)max_meta;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++)
+ re_dub /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c ", re_dub, suffixes[suffix_i]);
+
+ re_dub = (double)max_raw;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++)
+ re_dub /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ HDfprintf(stream, "%8.3lf%c\n", re_dub, suffixes[suffix_i]);
+
+ /******************************
+ * PRINT INDIVIDUAL BIN STATS *
+ ******************************/
+
+ HDfprintf(stream,
+ "BINS # of reads total bytes average size\n");
+ HDfprintf(stream,
+ " up-to meta raw meta raw meta raw\n");
+
+ for (i = 0; i <= ROS3_STATS_BIN_COUNT; i++) {
+ const ros3_statsbin *m;
+ const ros3_statsbin *r;
+ unsigned long long range_end = 0;
+ char bm_suffix = ' '; /* bytes-meta */
+ double bm_val = 0.0;
+ char br_suffix = ' '; /* bytes-raw */
+ double br_val = 0.0;
+ char am_suffix = ' '; /* average-meta */
+ double am_val = 0.0;
+ char ar_suffix = ' '; /* average-raw */
+ double ar_val = 0.0;
+
+ m = &file->meta[i];
+ r = &file->raw[i];
+ if (r->count == 0 && m->count == 0)
+ continue;
+
+ range_end = ros3_stats_boundaries[i];
+
+ if (i == ROS3_STATS_BIN_COUNT) {
+ range_end = ros3_stats_boundaries[i-1];
+ HDfprintf(stream, ">");
+ } else {
+ HDfprintf(stream, " ");
+ }
+
+ bm_val = (double)m->bytes;
+ for (suffix_i = 0; bm_val >= 1024.0; suffix_i++)
+ bm_val /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ bm_suffix = suffixes[suffix_i];
+
+ br_val = (double)r->bytes;
+ for (suffix_i = 0; br_val >= 1024.0; suffix_i++)
+ br_val /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ br_suffix = suffixes[suffix_i];
+
+ if (m->count > 0)
+ am_val = (double)(m->bytes) / (double)(m->count);
+ for (suffix_i = 0; am_val >= 1024.0; suffix_i++)
+ am_val /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ am_suffix = suffixes[suffix_i];
+
+ if (r->count > 0)
+ ar_val = (double)(r->bytes) / (double)(r->count);
+ for (suffix_i = 0; ar_val >= 1024.0; suffix_i++)
+ ar_val /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+ ar_suffix = suffixes[suffix_i];
+
+ re_dub = (double)range_end;
+ for (suffix_i = 0; re_dub >= 1024.0; suffix_i++)
+ re_dub /= 1024.0;
+ HDassert(suffix_i < sizeof(suffixes));
+
+ HDfprintf(stream,
+ " %8.3f%c %7d %7d %8.3f%c %8.3f%c %8.3f%c %8.3f%c\n",
+ re_dub, suffixes[suffix_i], /* bin ceiling */
+ m->count, /* metadata reads */
+ r->count, /* rawdata reads */
+ bm_val, bm_suffix, /* metadata bytes */
+ br_val, br_suffix, /* rawdata bytes */
+ am_val, am_suffix, /* metadata average */
+ ar_val, ar_suffix); /* rawdata average */
+
+ fflush(stream);
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* ros3_fprint_stats */
+#endif /* ROS3_STATS */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_close()
+ *
+ * Purpose:
+ *
+ * Close an HDF5 file.
+ *
+ * Return:
+ *
+ * SUCCEED/FAIL
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_close(H5FD_t *_file)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ H5FD_ros3_t *file = (H5FD_ros3_t *)_file;
+ herr_t ret_value = SUCCEED;
+#else
+ herr_t ret_value = FAIL;
+#endif
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+#ifdef H5_HAVE_ROS3_VFD
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_close() called.\n");
+#endif
+
+ /* Sanity checks
+ */
+ HDassert(file != NULL);
+ HDassert(file->s3r_handle != NULL);
+
+ /* Close the underlying request handle
+ */
+ if (FAIL == H5FD_s3comms_s3r_close(file->s3r_handle)) {
+ HGOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
+ "unable to close S3 request handle")
+ }
+
+#if ROS3_STATS
+ /* TODO: mechanism to re-target stats printout */
+ if (FAIL == ros3_fprint_stats(stdout, file)) {
+ HGOTO_ERROR(H5E_INTERNAL, H5E_ERROR, FAIL,
+ "problem while writing file statistics")
+ }
+#endif /* ROS3_STATS */
+
+ /* Release the file info
+ */
+ file = H5FL_FREE(H5FD_ros3_t, file);
+
+done:
+ curl_global_cleanup(); /* cleanup to answer init on open */
+#endif /* H5_HAVE_ROS3_VFD */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_ros3_close() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_cmp()
+ *
+ * Purpose:
+ *
+ * Compares two files belonging to this driver using an arbitrary
+ * (but consistent) ordering:
+ *
+ * + url scheme
+ * + url host
+ * + url port
+ * + url path
+ * + url query
+ * + fapl aws_region
+ * + fapl secret_id
+ * + fapl secret_key
+ *
+ * tl;dr -> check URL, check crentials
+ *
+ * Return:
+ *
+ * - Equivalent: 0
+ * - Not Equivalent: -1
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-06
+ *
+ * Changes:
+ *
+ * + Change from strcmp-like return values (-1, 0, 1) to instead return
+ * binary equivalence (0) or inequality (-1).
+ * + Replace "if still equal then check this" waterfall with GOTO jumps.
+ * Jacob Smith 2018-05-17
+ *
+ *-------------------------------------------------------------------------
+ */
+static int
+H5FD_ros3_cmp(const H5FD_t *_f1,
+ const H5FD_t *_f2)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ const H5FD_ros3_t *f1 = (const H5FD_ros3_t *)_f1;
+ const H5FD_ros3_t *f2 = (const H5FD_ros3_t *)_f2;
+ const parsed_url_t *purl1 = NULL;
+ const parsed_url_t *purl2 = NULL;
+#endif
+ int ret_value = 0;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+#ifdef H5_HAVE_ROS3_VFD
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_cmp() called.\n");
+#endif
+
+ HDassert(f1->s3r_handle != NULL);
+ HDassert(f2->s3r_handle != NULL);
+
+ purl1 = (const parsed_url_t *)f1->s3r_handle->purl;
+ purl2 = (const parsed_url_t *)f2->s3r_handle->purl;
+ HDassert(purl1 != NULL);
+ HDassert(purl2 != NULL);
+ HDassert(purl1->scheme != NULL);
+ HDassert(purl2->scheme != NULL);
+ HDassert(purl1->host != NULL);
+ HDassert(purl2->host != NULL);
+
+ /* URL: SCHEME */
+ if (strcmp(purl1->scheme, purl2->scheme)) HGOTO_DONE(-1);
+
+ /* URL: HOST */
+ if (strcmp(purl1->host, purl2->host)) HGOTO_DONE(-1);
+
+ /* URL: PORT */
+ if (purl1->port && purl2->port) {
+ if (strcmp(purl1->port, purl2->port)) HGOTO_DONE(-1);
+ } else if (purl1->port) {
+ HGOTO_DONE(-1);
+ } else if (purl2->port) {
+ HGOTO_DONE(-1);
+ }
+
+ /* URL: PATH */
+ if (purl1->path && purl2->path) {
+ if (strcmp(purl1->path, purl2->path)) HGOTO_DONE(-1);
+ } else if (purl1->path && !purl2->path) {
+ HGOTO_DONE(-1);
+ } else if (purl2->path && !purl1->path) {
+ HGOTO_DONE(-1);
+ }
+
+ /* URL: QUERY */
+ if (purl1->query && purl2->query) {
+ if (strcmp(purl1->query, purl2->query)) HGOTO_DONE(-1);
+ } else if (purl1->query && !purl2->query) {
+ HGOTO_DONE(-1);
+ } else if (purl2->query && !purl1->query) {
+ HGOTO_DONE(-1);
+ }
+
+ /* FAPL: AWS_REGION */
+ if (f1->fa.aws_region[0] != '\0' && f1->fa.aws_region[0] != '\0') {
+ if (strcmp(f1->fa.aws_region, f2->fa.aws_region)) HGOTO_DONE(-1);
+ } else if (f1->fa.aws_region[0] != '\0') {
+ HGOTO_DONE(-1);
+ } else if (f2->fa.aws_region[0] != '\0') {
+ HGOTO_DONE(-1);
+ }
+
+ /* FAPL: SECRET_ID */
+ if (f1->fa.secret_id[0] != '\0' && f1->fa.secret_id[0] != '\0') {
+ if (strcmp(f1->fa.secret_id, f2->fa.secret_id)) HGOTO_DONE(-1);
+ } else if (f1->fa.secret_id[0] != '\0') {
+ HGOTO_DONE(-1);
+ } else if (f2->fa.secret_id[0] != '\0') {
+ HGOTO_DONE(-1);
+ }
+
+ /* FAPL: SECRET_KEY */
+ if (f1->fa.secret_key[0] != '\0' && f1->fa.secret_key[0] != '\0') {
+ if (strcmp(f1->fa.secret_key, f2->fa.secret_key)) HGOTO_DONE(-1);
+ } else if (f1->fa.secret_key[0] != '\0') {
+ HGOTO_DONE(-1);
+ } else if (f2->fa.secret_key[0] != '\0') {
+ HGOTO_DONE(-1);
+ }
+#endif /* H5_HAVE_ROS3_VFD */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_ros3_cmp() */
+
+
+/*-------------------------------------------------------------------------
+ * Function: H5FD_ros3_query
+ *
+ * Purpose: Set the flags that this VFL driver is capable of supporting.
+ * (listed in H5FDpublic.h)
+ *
+ * Note that since the ROS3 VFD is read only, most flags
+ * are irrelevant.
+ *
+ * The term "set" is highly misleading...
+ * stores/copies the supported flags in the out-pointer `flags`.
+ *
+ * Return: SUCCEED (Can't fail)
+ *
+ * Programmer: John Mainzer
+ * 9/11/17
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_query(const H5FD_t H5_ATTR_UNUSED *_file,
+ unsigned long *flags /* out */)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_query() called.\n");
+#endif
+
+ /* Set the VFL feature flags that this driver supports */
+ if (flags) {
+ *flags = 0;
+ /* OK to perform data sieving for faster raw data reads & writes */
+ *flags |= H5FD_FEAT_DATA_SIEVE;
+ } /* end if */
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* H5FD_ros3_query() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_get_eoa()
+ *
+ * Purpose:
+ *
+ * Gets the end-of-address marker for the file. The EOA marker
+ * is the first address past the last byte allocated in the
+ * format address space.
+ *
+ * Return:
+ *
+ * The end-of-address marker.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static haddr_t
+H5FD_ros3_get_eoa(const H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type)
+{
+ const H5FD_ros3_t *file = (const H5FD_ros3_t *)_file;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_get_eoa() called.\n");
+#endif
+
+ FUNC_LEAVE_NOAPI(file->eoa)
+
+} /* end H5FD_ros3_get_eoa() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_set_eoa()
+ *
+ * Purpose:
+ *
+ * Set the end-of-address marker for the file.
+ *
+ * Return:
+ *
+ * SUCCEED (can't fail)
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-03
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_set_eoa(H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type,
+ haddr_t addr)
+{
+ H5FD_ros3_t *file = (H5FD_ros3_t *)_file;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_set_eoa() called.\n");
+#endif
+
+ file->eoa = addr;
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* H5FD_ros3_set_eoa() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_get_eof()
+ *
+ * Purpose:
+ *
+ * Returns the end-of-file marker.
+ *
+ * Return:
+ *
+ * EOF: the first address past the end of the "file", either the
+ * filesystem file or the HDF5 file.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ *-------------------------------------------------------------------------
+ */
+static haddr_t
+H5FD_ros3_get_eof(const H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type)
+{
+ const H5FD_ros3_t *file = (const H5FD_ros3_t *)_file;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_get_eof() called.\n");
+#endif
+
+ FUNC_LEAVE_NOAPI(H5FD_s3comms_s3r_get_filesize(file->s3r_handle))
+
+} /* end H5FD_ros3_get_eof() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_get_handle()
+ *
+ * Purpose:
+ *
+ * Returns the S3 Request handle (s3r_t) of ros3 file driver.
+ *
+ * Returns:
+ *
+ * SUCCEED/FAIL
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-02
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_get_handle(H5FD_t *_file,
+ hid_t H5_ATTR_UNUSED fapl,
+ void **file_handle)
+{
+ H5FD_ros3_t *file = (H5FD_ros3_t *)_file;
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_get_handle() called.\n");
+#endif
+
+ if(!file_handle)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "file handle not valid")
+
+ *file_handle = file->s3r_handle;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_ros3_get_handle() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_read()
+ *
+ * Purpose:
+ *
+ * Reads SIZE bytes of data from FILE beginning at address ADDR
+ * into buffer BUF according to data transfer properties in DXPL_ID.
+ *
+ * Return:
+ *
+ * Success: `SUCCEED`
+ * - Result is stored in caller-supplied buffer BUF.
+ * Failure: `FAIL`
+ * - Unable to complete read.
+ * - Contents of buffer `buf` are undefined.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-??
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_read(H5FD_t *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type,
+ hid_t H5_ATTR_UNUSED dxpl_id,
+ haddr_t addr, /* start offset */
+ size_t size, /* length of read */
+ void *buf) /* out */
+{
+ H5FD_ros3_t *file = (H5FD_ros3_t *)_file;
+ size_t filesize = 0;
+ herr_t ret_value = SUCCEED; /* Return value */
+#if ROS3_STATS
+ /* working variables for storing stats */
+ ros3_statsbin *bin = NULL;
+ unsigned bin_i = 0;
+#endif /* ROS3_STATS */
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_read() called.\n");
+#endif
+
+ HDassert(file != NULL);
+ HDassert(file->s3r_handle != NULL);
+ HDassert(buf != NULL);
+
+ filesize = H5FD_s3comms_s3r_get_filesize(file->s3r_handle);
+
+ if ((addr > filesize) || ((addr + size) > filesize)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "range exceeds file address")
+ }
+
+ if (FAIL == H5FD_s3comms_s3r_read(file->s3r_handle, addr, size, buf) ) {
+ HGOTO_ERROR(H5E_VFL, H5E_READERROR, FAIL, "unable to execute read")
+ }
+
+#if ROS3_STATS
+
+ /* Find which "bin" this read fits in. Can be "overflow" bin.
+ */
+ for (bin_i = 0; bin_i < ROS3_STATS_BIN_COUNT; bin_i++) {
+ if ((unsigned long long)size < ros3_stats_boundaries[bin_i])
+ break;
+ }
+ bin = (type == H5FD_MEM_DRAW)
+ ? &file->raw[bin_i]
+ : &file->meta[bin_i];
+
+ /* Store collected stats in appropriate bin
+ */
+ if (bin->count == 0) {
+ bin->min = size;
+ bin->max = size;
+ } else {
+ if (size < bin->min)
+ bin->min = size;
+ if (size > bin->max)
+ bin->max = size;
+ }
+ bin->count++;
+ bin->bytes += (unsigned long long)size;
+
+#endif /* ROS3_STATS */
+
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_ros3_read() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_write()
+ *
+ * Purpose:
+ *
+ * Write bytes to file.
+ * UNSUPPORTED IN READ-ONLY ROS3 VFD.
+ *
+ * Return:
+ *
+ * FAIL (Not possible with Read-Only S3 file.)
+ *
+ * Programmer: Jacob Smith
+ * 2017-10-23
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_write(H5FD_t H5_ATTR_UNUSED *_file,
+ H5FD_mem_t H5_ATTR_UNUSED type,
+ hid_t H5_ATTR_UNUSED dxpl_id,
+ haddr_t H5_ATTR_UNUSED addr,
+ size_t H5_ATTR_UNUSED size,
+ const void H5_ATTR_UNUSED *buf)
+{
+ herr_t ret_value = FAIL;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_write() called.\n");
+#endif
+
+ HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL,
+ "cannot write to read-only file.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_ros3_write() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_truncate()
+ *
+ * Purpose:
+ *
+ * Makes sure that the true file size is the same (or larger)
+ * than the end-of-address.
+ *
+ * NOT POSSIBLE ON READ-ONLY S3 FILES.
+ *
+ * Return:
+ *
+ * FAIL (Not possible on Read-Only S3 files.)
+ *
+ * Programmer: Jacob Smith
+ * 2017-10-23
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_truncate(H5FD_t H5_ATTR_UNUSED *_file,
+ hid_t H5_ATTR_UNUSED dxpl_id,
+ hbool_t H5_ATTR_UNUSED closing)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if ROS3_DEBUG
+ HDfprintf(stdout, "H5FD_ros3_truncate() called.\n");
+#endif
+
+ HGOTO_ERROR(H5E_VFL, H5E_UNSUPPORTED, FAIL,
+ "cannot truncate read-only file.")
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* end H5FD_ros3_truncate() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_lock()
+ *
+ * Purpose:
+ *
+ * Place an advisory lock on a file.
+ * No effect on Read-Only S3 file.
+ *
+ * Suggestion: remove lock/unlock from class
+ * > would result in error at H5FD_[un]lock() (H5FD.c)
+ *
+ * Return:
+ *
+ * SUCCEED (No-op always succeeds)
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-03
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_lock(H5FD_t H5_ATTR_UNUSED *_file,
+ hbool_t H5_ATTR_UNUSED rw)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* end H5FD_ros3_lock() */
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Function: H5FD_ros3_unlock()
+ *
+ * Purpose:
+ *
+ * Remove the existing lock on the file.
+ * No effect on Read-Only S3 file.
+ *
+ * Return:
+ *
+ * SUCCEED (No-op always succeeds)
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-03
+ *
+ * Changes: None.
+ *
+ *-------------------------------------------------------------------------
+ */
+static herr_t
+H5FD_ros3_unlock(H5FD_t H5_ATTR_UNUSED *_file)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+ FUNC_LEAVE_NOAPI(SUCCEED)
+
+} /* end H5FD_ros3_unlock() */
+
+
diff --git a/src/H5FDros3.h b/src/H5FDros3.h
new file mode 100644
index 0000000..49e757c
--- /dev/null
+++ b/src/H5FDros3.h
@@ -0,0 +1,105 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Read-Only S3 Virtual File Driver (VFD) *
+ * Copyright (c) 2017-2018, The HDF Group. *
+ * *
+ * All rights reserved. *
+ * *
+ * NOTICE: *
+ * All information contained herein is, and remains, the property of The HDF *
+ * Group. The intellectual and technical concepts contained herein are *
+ * proprietary to The HDF Group. Dissemination of this information or *
+ * reproduction of this material is strictly forbidden unless prior written *
+ * permission is obtained from The HDF Group. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*
+ * Programmer: John Mainzer
+ * 2017-10-10
+ *
+ * Purpose: The public header file for the ros3 driver.
+ */
+#ifndef H5FDros3_H
+#define H5FDros3_H
+
+#define H5FD_ROS3 (H5FD_ros3_init())
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/****************************************************************************
+ *
+ * Structure: H5FD_ros3_fapl_t
+ *
+ * Purpose:
+ *
+ * H5FD_ros3_fapl_t is a public structure that is used to pass S3
+ * authentication data to the appropriate S3 VFD via the FAPL. A pointer
+ * to an instance of this structure is a parameter to H5Pset_fapl_ros3()
+ * and H5Pget_fapl_ros3().
+ *
+ *
+ *
+ * `version` (int32_t)
+ *
+ * Version number of the H5FD_ros3_fapl_t structure. Any instance passed
+ * to the above calls must have a recognized version number, or an error
+ * will be flagged.
+ *
+ * This field should be set to H5FD__CURR_ROS3_FAPL_T_VERSION.
+ *
+ * `authenticate` (hbool_t)
+ *
+ * Flag TRUE or FALSE whether or not requests are to be authenticated
+ * with the AWS4 algorithm.
+ * If TRUE, `aws_region`, `secret_id`, and `secret_key` must be populated.
+ * If FALSE, those three components are unused.
+ *
+ * `aws_region` (char[])
+ *
+ * String: name of the AWS "region" of the host, e.g. "us-east-1".
+ *
+ * `secret_id` (char[])
+ *
+ * String: "Access ID" for the resource.
+ *
+ * `secret_key` (char[])
+ *
+ * String: "Secret Access Key" associated with the ID and resource.
+ *
+ *
+ *
+ * Programmer: John Mainzer
+ *
+ * Changes:
+ *
+ * - Add documentation of fields (except `version`)
+ * --- Jacob Smith 2017-12-04
+ *
+ ****************************************************************************/
+
+#define H5FD__CURR_ROS3_FAPL_T_VERSION 1
+
+#define H5FD__ROS3_MAX_REGION_LEN 32
+#define H5FD__ROS3_MAX_SECRET_ID_LEN 128
+#define H5FD__ROS3_MAX_SECRET_KEY_LEN 128
+
+typedef struct H5FD_ros3_fapl_t {
+ int32_t version;
+ hbool_t authenticate;
+ char aws_region[H5FD__ROS3_MAX_REGION_LEN + 1];
+ char secret_id[H5FD__ROS3_MAX_SECRET_ID_LEN + 1];
+ char secret_key[H5FD__ROS3_MAX_SECRET_KEY_LEN + 1];
+} H5FD_ros3_fapl_t;
+
+H5_DLL hid_t H5FD_ros3_init(void);
+H5_DLL herr_t H5Pget_fapl_ros3(hid_t fapl_id, H5FD_ros3_fapl_t * fa_out);
+H5_DLL herr_t H5Pset_fapl_ros3(hid_t fapl_id, H5FD_ros3_fapl_t * fa);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ifndef H5FDros3_H */
+
+
diff --git a/src/H5FDs3comms.c b/src/H5FDs3comms.c
new file mode 100644
index 0000000..7caeacb
--- /dev/null
+++ b/src/H5FDs3comms.c
@@ -0,0 +1,3770 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Read-Only S3 Virtual File Driver (VFD) *
+ * Copyright (c) 2017-2018, The HDF Group. *
+ * *
+ * All rights reserved. *
+ * *
+ * NOTICE: *
+ * All information contained herein is, and remains, the property of The HDF *
+ * Group. The intellectual and technical concepts contained herein are *
+ * proprietary to The HDF Group. Dissemination of this information or *
+ * reproduction of this material is strictly forbidden unless prior written *
+ * permission is obtained from The HDF Group. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*****************************************************************************
+ * Source for S3 Communications module
+ *
+ * ***NOT A FILE DRIVER***
+ *
+ * Provide functions and structures required for interfacing with Amazon
+ * Simple Storage Service (S3).
+ *
+ * Provide S3 object access as if it were a local file.
+ *
+ * Connect to remote host, send and receive HTTP requests and responses
+ * as part of the AWS REST API, authenticating requests as appropriate.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-30
+ *
+ *****************************************************************************/
+
+/****************/
+/* Module Setup */
+/****************/
+
+/***********/
+/* Headers */
+/***********/
+
+#include "H5private.h" /* generic functions */
+#include "H5Eprivate.h" /* error handling */
+#include "H5MMprivate.h" /* memory management */
+#include "H5FDs3comms.h" /* S3 Communications */
+
+/****************/
+/* Local Macros */
+/****************/
+
+/* toggle debugging (enable with 1)
+ */
+#define S3COMMS_DEBUG 0
+
+/* manipulate verbosity of CURL output
+ * operates separately from S3COMMS_DEBUG
+ *
+ * 0 -> no explicit curl output
+ * 1 -> on error, print failure info to stderr
+ * 2 -> in addition to above, print information for all performs; sets all
+ * curl handles with CURLOPT_VERBOSE
+ */
+#define S3COMMS_CURL_VERBOSITY 0
+
+/* size to allocate for "bytes=<first_byte>[-<last_byte>]" HTTP Range value
+ */
+#define S3COMMS_MAX_RANGE_STRING_SIZE 128
+
+/******************/
+/* Local Typedefs */
+/******************/
+
+/********************/
+/* Local Structures */
+/********************/
+
+/* struct s3r_datastruct
+ * Structure passed to curl write callback
+ * pointer to data region and record of bytes written (offset)
+ */
+struct s3r_datastruct {
+ unsigned long magic;
+ char *data;
+ size_t size;
+};
+#define S3COMMS_CALLBACK_DATASTRUCT_MAGIC 0x28c2b2ul
+
+/********************/
+/* Local Prototypes */
+/********************/
+
+size_t curlwritecallback(char *ptr,
+ size_t size,
+ size_t nmemb,
+ void *userdata);
+
+herr_t H5FD_s3comms_s3r_getsize(s3r_t *handle);
+
+/*********************/
+/* Package Variables */
+/*********************/
+
+/*****************************/
+/* Library Private Variables */
+/*****************************/
+
+/*******************/
+/* Local Variables */
+/*******************/
+
+/*************/
+/* Functions */
+/*************/
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: curlwritecallback()
+ *
+ * Purpose:
+ *
+ * Function called by CURL to write received data.
+ *
+ * Writes bytes to `userdata`.
+ *
+ * Internally manages number of bytes processed.
+ *
+ * Return:
+ *
+ * - Number of bytes processed.
+ * - Should equal number of bytes passed to callback.
+ * - Failure will result in curl error: CURLE_WRITE_ERROR.
+ *
+ * Programmer: Jacob Smith
+ * 2017-08-17
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+size_t
+curlwritecallback(char *ptr,
+ size_t size,
+ size_t nmemb,
+ void *userdata)
+{
+ struct s3r_datastruct *sds = (struct s3r_datastruct *)userdata;
+ size_t product = (size * nmemb);
+ size_t written = 0;
+
+ if (sds->magic != S3COMMS_CALLBACK_DATASTRUCT_MAGIC)
+ return written;
+
+ if (size > 0) {
+ HDmemcpy(&(sds->data[sds->size]), ptr, product);
+ sds->size += product;
+ written = product;
+ }
+
+ return written;
+
+} /* curlwritecallback */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_hrb_node_set()
+ *
+ * Purpose:
+ *
+ * Create, insert, modify, and remove elements in a field node list.
+ *
+ * `name` cannot be null; will return FAIL and list will be unaltered.
+ *
+ * Entries are accessed via the lowercase representation of their name:
+ * "Host", "host", and "hOSt" would all access the same node,
+ * but name's case is relevant in HTTP request output.
+ *
+ * List pointer `L` must always point to either of :
+ * - header node with lowest alphabetical order (by lowername)
+ * - NULL, if list is empty
+ *
+ * Types of operations:
+ *
+ * - CREATE
+ * - If `L` is NULL and `name` and `value` are not NULL,
+ * a new node is created at `L`, starting a list.
+ * - MODIFY
+ * - If a node is found with a matching lowercase name and `value`
+ * is not NULL, the existing name, value, and cat values are released
+ * and replaced with the new data.
+ * - No modifications are made to the list pointers.
+ * - REMOVE
+ * - If `value` is NULL, will attempt to remove node with matching
+ * lowercase name.
+ * - If no match found, returns FAIL and list is not modified.
+ * - When removing a node, all its resources is released.
+ * - If removing the last node in the list, list pointer is set to NULL.
+ * - INSERT
+ * - If no nodes exists with matching lowercase name and `value`
+ * is not NULL, a new node is created, inserted into list
+ * alphabetically by lowercase name.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - List was successfully modified
+ * - FAILURE: `FAIL`
+ * - Unable to perform operation
+ * - Forbidden (attempting to remove absent node, e.g.)
+ * - Internal error
+ *
+ * Programmer: Jacob Smith
+ * 2017-09-22
+ *
+ * Changes:
+ *
+ * - Change return value to herr_t
+ * - Change list pointer to pointer-to-pointer-to-node
+ * - Change to use singly-linked list (from twin doubly-linked lists)
+ * with modification to hrb_node_t
+ * --- Jake Smith 2017-01-17
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_hrb_node_set(hrb_node_t **L,
+ const char *name,
+ const char *value)
+{
+ size_t i = 0;
+ char *valuecpy = NULL;
+ char *namecpy = NULL;
+ size_t namelen = 0;
+ char *lowername = NULL;
+ char *nvcat = NULL;
+ hrb_node_t *node_ptr = NULL;
+ hrb_node_t *new_node = NULL;
+ hbool_t is_looking = TRUE;
+ herr_t ret_value = SUCCEED;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_hrb_node_set.\n");
+ HDprintf("NAME: %s\n", name);
+ HDprintf("VALUE: %s\n", value);
+ HDprintf("LIST:\n->");
+ for (node_ptr = (*L); node_ptr != NULL; node_ptr = node_ptr->next)
+ HDfprintf(stdout, "{%s}\n->", node_ptr->cat);
+ HDprintf("(null)\n");
+ fflush(stdout);
+ node_ptr = NULL;
+#endif
+
+ if (name == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to operate on null name.\n");
+ }
+ namelen = HDstrlen(name);
+
+ /***********************
+ * PREPARE ALL STRINGS *
+ **********************/
+
+ /* copy and lowercase name
+ */
+ lowername = (char *)H5MM_malloc(sizeof(char) * (namelen + 1));
+ if (lowername == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "cannot make space for lowercase name copy.\n");
+ }
+ for (i = 0; i < namelen; i++) {
+ lowername[i] = (char)tolower((int)name[i]);
+ }
+ lowername[namelen] = 0;
+
+ /* If value supplied, copy name, value, and concatenated "name: value".
+ * If NULL, we will be removing a node or doing nothing, so no need for
+ * copies
+ */
+ if (value != NULL) {
+ size_t valuelen = HDstrlen(value);
+ size_t catlen = namelen + valuelen + 2; /* HDstrlen(": ") -> +2 */
+ int sprint_ret = 0;
+
+ namecpy = (char *)H5MM_malloc(sizeof(char) * (namelen + 1));
+ if (namecpy == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "cannot make space for name copy.\n");
+ }
+ HDmemcpy(namecpy, name, namelen + 1);
+
+ valuecpy = (char *)H5MM_malloc(sizeof(char) * (valuelen + 1));
+ if (valuecpy == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "cannot make space for value copy.\n");
+ }
+ HDmemcpy(valuecpy, value, valuelen + 1);
+
+ nvcat = (char *)H5MM_malloc(sizeof(char) * (catlen + 1));
+ if (nvcat == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "cannot make space for concatenated string.\n");
+ }
+ sprint_ret = HDsnprintf(nvcat, (catlen + 1), "%s: %s", name, value);
+ if (sprint_ret <= 0)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "error while concatenating `%s: %s", name, value);
+ HDassert( catlen == (size_t)sprint_ret );
+
+ /* create new_node, should we need it
+ */
+ new_node = (hrb_node_t *)H5MM_malloc(sizeof(hrb_node_t));
+ if (new_node == NULL) {
+ HGOTO_ERROR(H5E_RESOURCE, H5E_NOSPACE, FAIL,
+ "cannot make space for new set.\n");
+ }
+
+ new_node->magic = S3COMMS_HRB_NODE_MAGIC;
+ new_node->name = NULL;
+ new_node->value = NULL;
+ new_node->cat = NULL;
+ new_node->lowername = NULL;
+ new_node->next = NULL;
+ }
+
+ /***************
+ * ACT ON LIST *
+ ***************/
+
+ if (*L == NULL) {
+ if (value == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "trying to remove node from empty list");
+ } else {
+#if S3COMMS_DEBUG
+HDprintf("CREATE NEW\n"); fflush(stdout);
+#endif
+ /*******************
+ * CREATE NEW LIST *
+ *******************/
+
+ new_node->cat = nvcat;
+ new_node->name = namecpy;
+ new_node->lowername = lowername;
+ new_node->value = valuecpy;
+
+ *L = new_node;
+ goto done; /* bypass further seeking */
+ }
+ }
+
+ /* sanity-check pointer passed in
+ */
+ HDassert( (*L) != NULL );
+ HDassert( (*L)->magic == S3COMMS_HRB_NODE_MAGIC );
+ node_ptr = (*L);
+
+ /* Check whether to modify/remove first node in list
+ */
+ if (strcmp(lowername, node_ptr->lowername) == 0) {
+
+ is_looking = FALSE;
+
+ if (value == NULL) {
+#if S3COMMS_DEBUG
+HDprintf("REMOVE HEAD\n"); fflush(stdout);
+#endif
+ /***************
+ * REMOVE HEAD *
+ ***************/
+
+ *L = node_ptr->next;
+
+#if S3COMMS_DEBUG
+HDprintf("FREEING CAT (node)\n"); fflush(stdout);
+#endif
+ H5MM_xfree(node_ptr->cat);
+#if S3COMMS_DEBUG
+HDprintf("FREEING LOWERNAME (node)\n"); fflush(stdout);
+#endif
+ H5MM_xfree(node_ptr->lowername);
+#if S3COMMS_DEBUG
+HDprintf("FREEING NAME (node)\n"); fflush(stdout);
+#endif
+ H5MM_xfree(node_ptr->name);
+#if S3COMMS_DEBUG
+HDprintf("FREEING VALUE (node)\n"); fflush(stdout);
+#endif
+ H5MM_xfree(node_ptr->value);
+#if S3COMMS_DEBUG
+HDprintf("MAGIC OK? %s\n",
+ (node_ptr->magic == S3COMMS_HRB_NODE_MAGIC) ? "YES" : "NO");
+fflush(stdout);
+#endif
+ HDassert( node_ptr->magic == S3COMMS_HRB_NODE_MAGIC );
+ node_ptr->magic += 1ul;
+#if S3COMMS_DEBUG
+HDprintf("FREEING POINTER\n"); fflush(stdout);
+#endif
+ H5MM_xfree(node_ptr);
+
+#if S3COMMS_DEBUG
+HDprintf("FREEING WORKING LOWERNAME\n"); fflush(stdout);
+#endif
+ H5MM_xfree(lowername); lowername = NULL;
+ } else {
+#if S3COMMS_DEBUG
+HDprintf("MODIFY HEAD\n"); fflush(stdout);
+#endif
+ /***************
+ * MODIFY HEAD *
+ ***************/
+
+ H5MM_xfree(node_ptr->cat);
+ H5MM_xfree(node_ptr->name);
+ H5MM_xfree(node_ptr->value);
+
+ node_ptr->name = namecpy;
+ node_ptr->value = valuecpy;
+ node_ptr->cat = nvcat;
+
+ H5MM_xfree(lowername);
+ lowername = NULL;
+ new_node->magic += 1ul;
+ H5MM_xfree(new_node);
+ new_node = NULL;
+ }
+ } else if (strcmp(lowername, node_ptr->lowername) < 0) {
+
+ is_looking = FALSE;
+
+ if (value == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "trying to remove a node 'before' head");
+ } else {
+#if S3COMMS_DEBUG
+HDprintf("PREPEND NEW HEAD\n"); fflush(stdout);
+#endif
+ /*******************
+ * INSERT NEW HEAD *
+ *******************/
+
+ new_node->name = namecpy;
+ new_node->value = valuecpy;
+ new_node->lowername = lowername;
+ new_node->cat = nvcat;
+ new_node->next = node_ptr;
+ *L = new_node;
+ }
+ }
+
+ /***************
+ * SEARCH LIST *
+ ***************/
+
+ while (is_looking) {
+ if (node_ptr->next == NULL) {
+
+ is_looking = FALSE;
+
+ if (value == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "trying to remove absent node");
+ } else {
+#if S3COMMS_DEBUG
+HDprintf("APPEND A NODE\n"); fflush(stdout);
+#endif
+ /*******************
+ * APPEND NEW NODE *
+ *******************/
+
+ HDassert( strcmp(lowername, node_ptr->lowername) > 0 );
+ new_node->name = namecpy;
+ new_node->value = valuecpy;
+ new_node->lowername = lowername;
+ new_node->cat = nvcat;
+ node_ptr->next = new_node;
+ }
+ } else if (strcmp(lowername, node_ptr->next->lowername) < 0) {
+
+ is_looking = FALSE;
+
+ if (value == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "trying to remove absent node");
+ } else {
+#if S3COMMS_DEBUG
+HDprintf("INSERT A NODE\n"); fflush(stdout);
+#endif
+ /*******************
+ * INSERT NEW NODE *
+ *******************/
+
+ HDassert( strcmp(lowername, node_ptr->lowername) > 0 );
+ new_node->name = namecpy;
+ new_node->value = valuecpy;
+ new_node->lowername = lowername;
+ new_node->cat = nvcat;
+ new_node->next = node_ptr->next;
+ node_ptr->next = new_node;
+ }
+ } else if (strcmp(lowername, node_ptr->next->lowername) == 0) {
+
+ is_looking = FALSE;
+
+ if (value == NULL) {
+ /*****************
+ * REMOVE A NODE *
+ *****************/
+
+ hrb_node_t *tmp = node_ptr->next;
+ node_ptr->next = tmp->next;
+
+#if S3COMMS_DEBUG
+HDprintf("REMOVE A NODE\n"); fflush(stdout);
+#endif
+ H5MM_xfree(tmp->cat);
+ H5MM_xfree(tmp->lowername);
+ H5MM_xfree(tmp->name);
+ H5MM_xfree(tmp->value);
+
+ HDassert( tmp->magic == S3COMMS_HRB_NODE_MAGIC );
+ tmp->magic += 1ul;
+ H5MM_xfree(tmp);
+
+ H5MM_xfree(lowername);
+ lowername = NULL;
+ } else {
+#if S3COMMS_DEBUG
+HDprintf("MODIFY A NODE\n"); fflush(stdout);
+#endif
+ /*****************
+ * MODIFY A NODE *
+ *****************/
+
+ node_ptr = node_ptr->next;
+ H5MM_xfree(node_ptr->name);
+ H5MM_xfree(node_ptr->value);
+ H5MM_xfree(node_ptr->cat);
+
+ HDassert( new_node->magic == S3COMMS_HRB_NODE_MAGIC );
+ new_node->magic += 1ul;
+ H5MM_xfree(new_node);
+ H5MM_xfree(lowername);
+ new_node = NULL;
+ lowername = NULL;
+
+ node_ptr->name = namecpy;
+ node_ptr->value = valuecpy;
+ node_ptr->cat = nvcat;
+ }
+ } else {
+ /****************
+ * KEEP LOOKING *
+ ****************/
+
+ node_ptr = node_ptr->next;
+ }
+ }
+
+done:
+ if (ret_value == FAIL) {
+ /* clean up
+ */
+ if (nvcat != NULL) H5MM_xfree(nvcat);
+ if (namecpy != NULL) H5MM_xfree(namecpy);
+ if (lowername != NULL) H5MM_xfree(lowername);
+ if (valuecpy != NULL) H5MM_xfree(valuecpy);
+ if (new_node != NULL) {
+ HDassert( new_node->magic == S3COMMS_HRB_NODE_MAGIC );
+ new_node->magic += 1ul;
+ H5MM_xfree(new_node);
+ }
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_hrb_node_set */
+
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_hrb_destroy()
+ *
+ * Purpose:
+ *
+ * Destroy and free resources _directly_ associated with an HTTP Buffer.
+ *
+ * Takes a pointer to pointer to the buffer structure.
+ * This allows for the pointer itself to be NULLed from within the call.
+ *
+ * If buffer or buffer pointer is NULL, there is no effect.
+ *
+ * Headers list at `first_header` is not touched.
+ *
+ * - Programmer should re-use or destroy `first_header` pointer
+ * (hrb_node_t *) as suits their purposes.
+ * - Recommend fetching prior to destroy()
+ * e.g., `reuse_node = hrb_to_die->first_header; destroy(hrb_to_die);`
+ * or maintaining an external reference.
+ * - Destroy node/list separately as appropriate
+ * - Failure to account for this will result in a memory leak.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - successfully released buffer resources
+ * - if `buf` is NULL or `*buf` is NULL, no effect
+ * - FAILURE: `FAIL`
+ * - `buf->magic != S3COMMS_HRB_MAGIC`
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-21
+ *
+ * Changes:
+ *
+ * - Conditional free() of `hrb_node_t` pointer properties based on
+ * `which_free` property.
+ * --- Jacob Smith 2017-08-08
+ *
+ * - Integrate with HDF5.
+ * - Returns herr_t instead of nothing.
+ * --- Jacob Smith 2017-09-21
+ *
+ * - Change argument to from *buf to **buf, to null pointer within call
+ * --- Jacob Smith 2017-20-05
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_hrb_destroy(hrb_t **_buf)
+{
+ hrb_t *buf = NULL;
+ herr_t ret_value = SUCCEED;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_hrb_destroy.\n");
+#endif
+
+ if (_buf != NULL && *_buf != NULL) {
+ buf = *_buf;
+ if (buf->magic != S3COMMS_HRB_MAGIC) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "pointer's magic does not match.\n");
+ }
+
+ H5MM_xfree(buf->verb);
+ H5MM_xfree(buf->version);
+ H5MM_xfree(buf->resource);
+ buf->magic += 1ul;
+ H5MM_xfree(buf);
+ *_buf = NULL;
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_s3comms_hrb_destroy */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_hrb_init_request()
+ *
+ * Purpose:
+ *
+ * Create a new HTTP Request Buffer
+ *
+ * All non-null arguments should be null-terminated strings.
+ *
+ * If `verb` is NULL, defaults to "GET".
+ * If `http_version` is NULL, defaults to "HTTP/1.1".
+ *
+ * `resource` cannot be NULL; should be string beginning with slash
+ * character ('/').
+ *
+ * All strings are copied into the structure, making them safe from
+ * modification in source strings.
+ *
+ * Return:
+ *
+ * - SUCCESS: pointer to new `hrb_t`
+ * - FAILURE: `NULL`
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-21
+ *
+ * Changes:
+ *
+ * - Update struct membership for newer 'generic' `hrb_t` format.
+ * --- Jacob Smith, 2017-07-24
+ *
+ * - Rename from `hrb_new()` to `hrb_request()`
+ * --- Jacob Smith, 2017-07-25
+ *
+ * - Integrate with HDF5.
+ * - Rename from 'hrb_request()` to `H5FD_s3comms_hrb_init_request()`.
+ * - Remove `host` from input parameters.
+ * - Host, as with all other fields, must now be added through the
+ * add-field functions.
+ * - Add `version` (HTTP version string, e.g. "HTTP/1.1") to parameters.
+ * --- Jacob Smith 2017-09-20
+ *
+ * - Update to use linked-list `hrb_node_t` headers in structure.
+ * --- Jacob Smith 2017-10-05
+ *
+ *----------------------------------------------------------------------------
+ */
+hrb_t *
+H5FD_s3comms_hrb_init_request(const char *_verb,
+ const char *_resource,
+ const char *_http_version)
+{
+ hrb_t *request = NULL;
+ char *res = NULL;
+ size_t reslen = 0;
+ hrb_t *ret_value = NULL;
+ char *verb = NULL;
+ size_t verblen = 0;
+ char *vrsn = NULL;
+ size_t vrsnlen = 0;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_hrb_init_request.\n");
+#endif
+
+ if (_resource == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "resource string cannot be null.\n");
+ }
+
+ /* populate valid NULLs with defaults
+ */
+ if (_verb == NULL)
+ _verb = "GET";
+
+ if (_http_version == NULL)
+ _http_version = "HTTP/1.1";
+
+ /* malloc space for and prepare structure
+ */
+ request = (hrb_t *)H5MM_malloc(sizeof(hrb_t));
+ if (request == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL,
+ "no space for request structure");
+ }
+ request->magic = S3COMMS_HRB_MAGIC;
+ request->body = NULL;
+ request->body_len = 0;
+ request->first_header = NULL;
+
+
+
+ /* malloc and copy strings for the structure
+ */
+ if (_resource[0] == '/') {
+ reslen = HDstrlen(_resource) + 1;
+ res = (char *)H5MM_malloc(sizeof(char) * reslen);
+ if (res == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL,
+ "no space for resource string");
+ }
+ HDstrncpy(res, _resource, reslen);
+ } else {
+ int sprint_ret = 0;
+ reslen = HDstrlen(_resource) + 2;
+ res = (char *)H5MM_malloc(sizeof(char) * reslen);
+ if (res == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL,
+ "no space for resource string");
+ }
+ sprint_ret = HDsnprintf(res, reslen, "/%s", _resource);
+ if (sprint_ret <= 0)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "errro while appending resource string %s", _resource);
+ HDassert( (reslen - 1) == (size_t)sprint_ret );
+ } /* start resource string with '/' */
+
+ verblen = HDstrlen(_verb) + 1;
+ verb = (char *)H5MM_malloc(sizeof(char) * verblen);
+ if (verb == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "no space for verb string");
+ }
+ HDstrncpy(verb, _verb, verblen);
+
+ vrsnlen = HDstrlen(_http_version) + 1;
+ vrsn = (char *)H5MM_malloc(sizeof(char) * vrsnlen);
+ if (vrsn == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "no space for http-version string");
+ }
+ HDstrncpy(vrsn, _http_version, vrsnlen);
+
+
+
+ /* place new copies into structure
+ */
+ request->resource = res;
+ request->verb = verb;
+ request->version = vrsn;
+
+ ret_value = request;
+
+done:
+
+ /* if there is an error, clean up after ourselves
+ */
+ if (ret_value == NULL) {
+ if (request != NULL) H5MM_xfree(request);
+ if (vrsn != NULL) H5MM_xfree(vrsn);
+ if (verb != NULL) H5MM_xfree(verb);
+ if (res != NULL) H5MM_xfree(res);
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_s3comms_hrb_init_request */
+
+
+
+/****************************************************************************
+ * S3R FUNCTIONS
+ ****************************************************************************/
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_s3r_close()
+ *
+ * Purpose:
+ *
+ * Close communications through given S3 Request Handle (`s3r_t`)
+ * and clean up associated resources.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ * - fails if handle is null or has invalid magic number
+ *
+ *
+ * Programmer: Jacob Smith
+ * 2017-08-31
+ *
+ * Changes:
+ *
+ * - Remove all messiness related to the now-gone "setopt" utility
+ * as it no longer exists in the handle.
+ * - Return type to `void`.
+ * --- Jacob Smith 2017-09-01
+ *
+ * - Incorporate into HDF environment.
+ * - Rename from `s3r_close()` to `H5FD_s3comms_s3r_close()`.
+ * --- Jacob Smith 2017-10-06
+ *
+ * - Change separate host, resource, port info to `parsed_url_t` struct ptr.
+ * --- Jacob Smith 2017-11-01
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_s3r_close(s3r_t *handle)
+{
+ herr_t ret_value = SUCCEED;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5_HAVE_ROS3_VFD
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_s3r_close.\n");
+#endif
+
+ if (handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle cannot be null.\n");
+ }
+ if (handle->magic != S3COMMS_S3R_MAGIC) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle has invalid magic.\n");
+ }
+
+ curl_easy_cleanup(handle->curlhandle);
+
+ H5MM_xfree(handle->secret_id);
+ H5MM_xfree(handle->region);
+ H5MM_xfree(handle->signing_key);
+
+ HDassert( handle->httpverb != NULL );
+ H5MM_xfree(handle->httpverb);
+
+ if (FAIL == H5FD_s3comms_free_purl(handle->purl)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to release parsed url structure")
+ }
+
+ H5MM_xfree(handle);
+
+#endif /* H5_HAVE_ROS3_VFD */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_s3comms_s3r_close */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_s3r_get_filesize()
+ *
+ * Purpose:
+ *
+ * Retrieve the filesize of an open request handle.
+ *
+ * Wrapper "getter" to hide implementation details.
+ *
+ *
+ * Return:
+ *
+ * - SUCCESS: size of file, in bytes, if handle is valid.
+ * - FAILURE: 0, if handle is NULL or undefined.
+ *
+ * Programmer: Jacob Smith 2017-01-14
+ *
+ * Changes: None
+ *
+ *----------------------------------------------------------------------------
+ */
+size_t
+H5FD_s3comms_s3r_get_filesize(s3r_t *handle) {
+
+ size_t ret_value = 0;
+
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#ifdef H5_HAVE_ROS3_VFD
+ if (handle != NULL)
+ ret_value = handle->filesize;
+#endif /* H5_HAVE_ROS3_VFD */
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_s3comms_s3r_get_filesize */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_s3r_getsize()
+ *
+ * Purpose:
+ *
+ * Get the number of bytes of handle's target resource.
+ *
+ * Sets handle and curlhandle with to enact an HTTP HEAD request on file,
+ * and parses received headers to extract "Content-Length" from response
+ * headers, storing file size at `handle->filesize`.
+ *
+ * Critical step in opening (initiating) an `s3r_t` handle.
+ *
+ * Wraps `s3r_read()`.
+ * Sets curlhandle to write headers to a temporary buffer (using extant
+ * write callback) and provides no buffer for body.
+ *
+ * Upon exit, unsets HTTP HEAD settings from curl handle, returning to
+ * initial state. In event of error, curl handle state is undefined and is
+ * not to be trusted.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ *
+ * Programmer: Jacob Smith
+ * 2017-08-23
+ *
+ * Changes:
+ *
+ * - Update to revised `s3r_t` format and life cycle.
+ * --- Jacob Smith 2017-09-01
+ *
+ * - Conditional change to static header buffer and structure.
+ * --- Jacob Smith 2017-09-05
+ *
+ * - Incorporate into HDF environment.
+ * - Rename from `s3r_getsize()` to `H5FD_s3comms_s3r_getsize()`.
+ * --- Jacob Smith 2017-10-06
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_s3r_getsize(s3r_t *handle)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ unsigned long int content_length = 0;
+ CURL *curlh = NULL;
+ char *end = NULL;
+ char *headerresponse = NULL;
+ herr_t ret_value = SUCCEED;
+ struct s3r_datastruct sds = {
+ S3COMMS_CALLBACK_DATASTRUCT_MAGIC,
+ NULL,
+ 0 };
+ char *start = NULL;
+#else
+ herr_t ret_value = FAIL;
+#endif /* H5_HAVE_ROS3_VFD */
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5_HAVE_ROS3_VFD
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_s3r_getsize.\n");
+#endif
+
+ if (handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle cannot be null.\n");
+ }
+ if (handle->magic != S3COMMS_S3R_MAGIC) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle has invalid magic.\n");
+ }
+ if (handle->curlhandle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle has bad (null) curlhandle.\n")
+ }
+
+ /********************
+ * PREPARE FOR HEAD *
+ ********************/
+
+ curlh = handle->curlhandle;
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_NOBODY,
+ 1L) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "error while setting CURL option (CURLOPT_NOBODY). "
+ "(placeholder flags)");
+ }
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_HEADERDATA,
+ &sds) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "error while setting CURL option (CURLOPT_HEADERDATA). "
+ "(placeholder flags)");
+ }
+
+ HDassert( handle->httpverb == NULL );
+ handle->httpverb = (char *)H5MM_malloc(sizeof(char) * 16);
+ if (handle->httpverb == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "unable to allocate space for S3 request HTTP verb");
+ }
+ HDmemcpy(handle->httpverb, "HEAD", 5);
+
+ headerresponse = (char *)H5MM_malloc(sizeof(char) * CURL_MAX_HTTP_HEADER);
+ if (headerresponse == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "unable to allocate space for curl header response");
+ }
+ sds.data = headerresponse;
+
+ /*******************
+ * PERFORM REQUEST *
+ *******************/
+
+ /* these parameters fetch the entire file,
+ * but, with a NULL destination and NOBODY and HEADERDATA supplied above,
+ * only http metadata will be sent by server and recorded by s3comms
+ */
+ if (FAIL ==
+ H5FD_s3comms_s3r_read(handle, 0, 0, NULL) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem in reading during getsize.\n");
+ }
+
+ if (sds.size > CURL_MAX_HTTP_HEADER) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "HTTP metadata buffer overrun\n");
+ } else if (sds.size == 0) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "No HTTP metadata\n");
+#if S3COMMS_DEBUG
+ } else {
+ HDfprintf(stderr, "GETSIZE: OK\n");
+#endif
+ }
+
+
+ /******************
+ * PARSE RESPONSE *
+ ******************/
+
+ start = strstr(headerresponse,
+ "\r\nContent-Length: ");
+ if (start == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not find \"Content-Length\" in response.\n");
+ }
+
+ /* move "start" to beginning of value in line; find end of line
+ */
+ start = start + HDstrlen("\r\nContent-Length: ");
+ end = strstr(start, "\r\n");
+ if (end == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not find end of content length line");
+ }
+
+ /* place null terminator at end of numbers
+ */
+ *end = '\0';
+
+ content_length = strtoul((const char *)start,
+ NULL,
+ 0);
+ if (content_length == 0 ||
+ content_length == ULONG_MAX ||
+ errno == ERANGE) /* errno set by strtoul */
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not convert found \"Content-Length\" response (\"%s\")",
+ start); /* range is null-terminated, remember */
+ }
+
+ handle->filesize = (size_t)content_length;
+
+ /**********************
+ * UNDO HEAD SETTINGS *
+ **********************/
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_NOBODY,
+ 0) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "error while setting CURL option (CURLOPT_NOBODY). "
+ "(placeholder flags)");
+ }
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_HEADERDATA,
+ 0) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "error while setting CURL option (CURLOPT_HEADERDATA). "
+ "(placeholder flags)");
+ }
+
+done:
+ H5MM_xfree(headerresponse);
+ sds.magic += 1; /* set to bad magic */
+
+#endif /* H5_HAVE_ROS3_VFD */
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_s3r_getsize */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_s3r_open()
+ *
+ * Purpose:
+ *
+ * Logically 'open' a file hosted on S3.
+ *
+ * - create new Request Handle
+ * - copy supplied url
+ * - copy authentication info if supplied
+ * - create CURL handle
+ * - fetch size of file
+ * - connect with server and execute HEAD request
+ * - return request handle ready for reads
+ *
+ * To use 'default' port to connect, `port` should be 0.
+ *
+ * To prevent AWS4 authentication, pass null pointer to `region`, `id`,
+ * and `signing_key`.
+ *
+ * Uses `H5FD_s3comms_parse_url()` to validate and parse url input.
+ *
+ * Return:
+ *
+ * - SUCCESS: Pointer to new request handle.
+ * - FAILURE: NULL
+ * - occurs if:
+ * - authentication strings are inconsistent
+ * - must _all_ be null, or have at least `region` and `id`
+ * - url is NULL (no filename)
+ * - unable to parse url (malformed?)
+ * - error while performing `getsize()`
+ *
+ * Programmer: Jacob Smith
+ * 2017-09-01
+ *
+ * Changes:
+ *
+ * - Incorporate into HDF environment.
+ * - Rename from `s3r_open()` to `H5FD_s3comms_s3r_open()`.
+ * --- Jacob Smith 2017-10-06
+ *
+ * - Remove port number from signature.
+ * - Name (`url`) must be complete url with http scheme and optional port
+ * number in string.
+ * - e.g., "http://bucket.aws.com:9000/myfile.dat?query=param"
+ * - Internal storage of host, resource, and port information moved into
+ * `parsed_url_t` struct pointer.
+ * --- Jacob Smith 2017-11-01
+ *
+ *----------------------------------------------------------------------------
+ */
+s3r_t *
+H5FD_s3comms_s3r_open(const char *url,
+ const char *region,
+ const char *id,
+ const unsigned char *signing_key)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ size_t tmplen = 0;
+ CURL *curlh = NULL;
+ s3r_t *handle = NULL;
+ parsed_url_t *purl = NULL;
+#endif
+ s3r_t *ret_value = NULL;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5_HAVE_ROS3_VFD
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_s3r_open.\n");
+#endif
+
+
+
+ if (url == NULL || url[0] == '\0') {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "url cannot be null.\n");
+ }
+
+ if (FAIL == H5FD_s3comms_parse_url(url, &purl)) {
+ /* probably a malformed url, but could be internal error */
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTCREATE, NULL,
+ "unable to create parsed url structure");
+ }
+ HDassert( purl != NULL ); /* if above passes, this must be true */
+ HDassert( purl->magic == S3COMMS_PARSED_URL_MAGIC );
+
+ handle = (s3r_t *)H5MM_malloc(sizeof(s3r_t));
+ if (handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, NULL,
+ "could not malloc space for handle.\n");
+ }
+
+ handle->magic = S3COMMS_S3R_MAGIC;
+ handle->purl = purl;
+ handle->filesize = 0;
+ handle->region = NULL;
+ handle->secret_id = NULL;
+ handle->signing_key = NULL;
+ handle->httpverb = NULL;
+
+ /*************************************
+ * RECORD AUTHENTICATION INFORMATION *
+ *************************************/
+
+ if ((region != NULL && *region != '\0') ||
+ (id != NULL && *id != '\0') ||
+ (signing_key != NULL && *signing_key != '\0'))
+ {
+ /* if one exists, all three must exist
+ */
+ if (region == NULL || region[0] == '\0') {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "region cannot be null.\n");
+ }
+ if (id == NULL || id[0] == '\0') {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "secret id cannot be null.\n");
+ }
+ if (signing_key == NULL || signing_key[0] == '\0') {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "signing key cannot be null.\n");
+ }
+
+ /* copy strings
+ */
+ tmplen = HDstrlen(region) + 1;
+ handle->region = (char *)H5MM_malloc(sizeof(char) * tmplen);
+ if (handle->region == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "could not malloc space for handle region copy.\n");
+ }
+ HDmemcpy(handle->region, region, tmplen);
+
+ tmplen = HDstrlen(id) + 1;
+ handle->secret_id = (char *)H5MM_malloc(sizeof(char) * tmplen);
+ if (handle->secret_id == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "could not malloc space for handle ID copy.\n");
+ }
+ HDmemcpy(handle->secret_id, id, tmplen);
+
+ tmplen = SHA256_DIGEST_LENGTH;
+ handle->signing_key =
+ (unsigned char *)H5MM_malloc(sizeof(unsigned char) * tmplen);
+ if (handle->signing_key == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "could not malloc space for handle key copy.\n");
+ }
+ HDmemcpy(handle->signing_key, signing_key, tmplen);
+ } /* if authentication information provided */
+
+ /************************
+ * INITIATE CURL HANDLE *
+ ************************/
+
+ curlh = curl_easy_init();
+
+ if (curlh == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "problem creating curl easy handle!\n");
+ }
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_HTTPGET,
+ 1L) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "error while setting CURL option (CURLOPT_HTTPGET). "
+ "(placeholder flags)");
+ }
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_HTTP_VERSION,
+ CURL_HTTP_VERSION_1_1) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "error while setting CURL option (CURLOPT_HTTP_VERSION). "
+ "(placeholder flags)");
+ }
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_FAILONERROR,
+ 1L) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "error while setting CURL option (CURLOPT_FAILONERROR). "
+ "(placeholder flags)");
+ }
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_WRITEFUNCTION,
+ curlwritecallback) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "error while setting CURL option (CURLOPT_WRITEFUNCTION). "
+ "(placeholder flags)");
+ }
+
+ if ( CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_URL,
+ url) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "error while setting CURL option (CURLOPT_URL). "
+ "(placeholder flags)");
+ }
+
+#if S3COMMS_CURL_VERBOSITY > 1
+ /* CURL will print (to stdout) information for each operation
+ */
+ curl_easy_setopt(curlh, CURLOPT_VERBOSE, 1L);
+#endif
+
+ handle->curlhandle = curlh;
+
+ /*******************
+ * OPEN CONNECTION *
+ * * * * * * * * * *
+ * GET FILE SIZE *
+ *******************/
+
+ if (FAIL ==
+ H5FD_s3comms_s3r_getsize(handle) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "problem in H5FD_s3comms_s3r_getsize.\n");
+ }
+
+ /*********************
+ * FINAL PREPARATION *
+ *********************/
+
+ HDassert( handle->httpverb != NULL );
+ HDmemcpy(handle->httpverb, "GET", 4);
+
+ ret_value = handle;
+#endif /* H5_HAVE_ROS3_VFD */
+
+done:
+ if (ret_value == NULL) {
+#ifdef H5_HAVE_ROS3_VFD
+ if (curlh != NULL) {
+ curl_easy_cleanup(curlh);
+ }
+ if (FAIL == H5FD_s3comms_free_purl(purl)) {
+ HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, NULL,
+ "unable to free parsed url structure")
+ }
+ if (handle != NULL) {
+ H5MM_xfree(handle->region);
+ H5MM_xfree(handle->secret_id);
+ H5MM_xfree(handle->signing_key);
+ if (handle->httpverb != NULL) {
+ H5MM_xfree(handle->httpverb);
+ }
+ H5MM_xfree(handle);
+ }
+#endif /* H5_HAVE_ROS3_VFD */
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_s3comms_s3r_open */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_s3r_read()
+ *
+ * Purpose:
+ *
+ * Read file pointed to by request handle, writing specified
+ * `offset` .. `offset + len` bytes to buffer `dest`.
+ *
+ * If `len` is 0, reads entirety of file starting at `offset`.
+ * If `offset` and `len` are both 0, reads entire file.
+ *
+ * If `offset` or `offset+len` is greater than the file size, read is
+ * aborted and returns `FAIL`.
+ *
+ * Uses configured "curl easy handle" to perform request.
+ *
+ * In event of error, buffer should remain unaltered.
+ *
+ * If handle is set to authorize a request, creates a new (temporary)
+ * HTTP Request object (hrb_t) for generating requisite headers,
+ * which is then translated to a `curl slist` and set in the curl handle
+ * for the request.
+ *
+ * `dest` _may_ be NULL, but no body data will be recorded.
+ *
+ * - In general practice, NULL should never be passed in as `dest`.
+ * - NULL `dest` passed in by internal function `s3r_getsize()`, in
+ * conjunction with CURLOPT_NOBODY to preempt transmission of file data
+ * from server.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ *
+ * Programmer: Jacob Smith
+ * 2017-08-22
+ *
+ * Changes:
+ *
+ * - Revise structure to prevent unnecessary hrb_t element creation.
+ * - Rename tmprstr -> rangebytesstr to reflect purpose.
+ * - Insert needed `free()`s, particularly for `sds`.
+ * --- Jacob Smith 2017-08-23
+ *
+ * - Revise heavily to accept buffer, range as parameters.
+ * - Utilize modified s3r_t format.
+ * --- Jacob Smith 2017-08-31
+ *
+ * - Incorporate into HDF library.
+ * - Rename from `s3r_read()` to `H5FD_s3comms_s3r_read()`.
+ * - Return `herr_t` succeed/fail instead of S3code.
+ * - Update to use revised `hrb_t` and `hrb_node_t` structures.
+ * --- Jacob Smith 2017-10-06
+ *
+ * - Update to use `parsed_url_t *purl` in handle.
+ * --- Jacob Smith 2017-11-01
+ *
+ * - Better define behavior upon read past EOF
+ * --- Jacob Smith 2017-01-19
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_s3r_read(s3r_t *handle,
+ haddr_t offset,
+ size_t len,
+ void *dest)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ CURL *curlh = NULL;
+ CURLcode p_status = CURLE_OK;
+ struct curl_slist *curlheaders = NULL;
+ hrb_node_t *headers = NULL;
+ hrb_node_t *node = NULL;
+ struct tm *now = NULL;
+ char *rangebytesstr = NULL;
+ hrb_t *request = NULL;
+ int ret = 0; /* working variable to check */
+ /* return value of HDsnprintf */
+ struct s3r_datastruct *sds = NULL;
+ herr_t ret_value = SUCCEED;
+#else
+ herr_t ret_value = FAIL;
+#endif /* H5_HAVE_ROS3_VFD */
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5_HAVE_ROS3_VFD
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_s3r_read.\n");
+#endif
+
+ /**************************************
+ * ABSOLUTELY NECESSARY SANITY-CHECKS *
+ **************************************/
+
+ if (handle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle cannot be null.\n");
+ }
+ if (handle->magic != S3COMMS_S3R_MAGIC) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle has invalid magic.\n");
+ }
+ if (handle->curlhandle == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle has bad (null) curlhandle.\n")
+ }
+ if (handle->purl == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle has bad (null) url.\n")
+ }
+ HDassert( handle->purl->magic == S3COMMS_PARSED_URL_MAGIC );
+ if (offset > handle->filesize || (len + offset) > handle->filesize) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to read past EoF")
+ }
+
+ curlh = handle->curlhandle;
+
+ /*********************
+ * PREPARE WRITEDATA *
+ *********************/
+
+ if (dest != NULL) {
+ sds = (struct s3r_datastruct *)H5MM_malloc(
+ sizeof(struct s3r_datastruct));
+ if (sds == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "could not malloc destination datastructure.\n");
+ }
+
+ sds->magic = S3COMMS_CALLBACK_DATASTRUCT_MAGIC;
+ sds->data = (char *)dest;
+ sds->size = 0;
+ if (CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_WRITEDATA,
+ sds) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_UNINITIALIZED, FAIL,
+ "error while setting CURL option (CURLOPT_WRITEDATA). "
+ "(placeholder flags)");
+ }
+ }
+
+ /*********************
+ * FORMAT HTTP RANGE *
+ *********************/
+
+ if (len > 0) {
+ rangebytesstr = (char *)H5MM_malloc(sizeof(char) * \
+ S3COMMS_MAX_RANGE_STRING_SIZE );
+ if (rangebytesstr == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "could not malloc range format string.\n");
+ }
+ ret = HDsnprintf(rangebytesstr,
+ (S3COMMS_MAX_RANGE_STRING_SIZE),
+ "bytes="H5_PRINTF_HADDR_FMT"-"H5_PRINTF_HADDR_FMT,
+ offset,
+ offset + len - 1);
+ if (ret == 0 || ret >= S3COMMS_MAX_RANGE_STRING_SIZE)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to format HTTP Range value");
+ } else if (offset > 0) {
+ rangebytesstr = (char *)H5MM_malloc(sizeof(char) * \
+ S3COMMS_MAX_RANGE_STRING_SIZE);
+ if (rangebytesstr == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "could not malloc range format string.\n");
+ }
+ ret = HDsnprintf(rangebytesstr,
+ (S3COMMS_MAX_RANGE_STRING_SIZE),
+ "bytes="H5_PRINTF_HADDR_FMT"-",
+ offset);
+ if (ret == 0 || ret >= S3COMMS_MAX_RANGE_STRING_SIZE)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to format HTTP Range value");
+ }
+
+ /*******************
+ * COMPILE REQUEST *
+ *******************/
+
+ if (handle->signing_key == NULL) {
+ /* Do not authenticate.
+ */
+ if (rangebytesstr != NULL) {
+ /* Pass in range directly
+ */
+ char *bytesrange_ptr = NULL; /* pointer past "bytes=" portion */
+
+ bytesrange_ptr = strchr(rangebytesstr, '=');
+ HDassert( bytesrange_ptr != NULL );
+ bytesrange_ptr++; /* move to first char past '=' */
+ HDassert( *bytesrange_ptr != '\0' );
+
+ if (CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_RANGE,
+ bytesrange_ptr) )
+ {
+ HGOTO_ERROR(H5E_VFL, H5E_UNINITIALIZED, FAIL,
+ "error while setting CURL option (CURLOPT_RANGE). ");
+ }
+ }
+ } else {
+ /* authenticate request
+ */
+ char authorization[512];
+ /* 512 := approximate max length...
+ * 67 <len("AWS4-HMAC-SHA256 Credential=///s3/aws4_request,"
+ * "SignedHeaders=,Signature=")>
+ * + 8 <yyyyMMDD>
+ * + 64 <hex(sha256())>
+ * + 128 <max? len(secret_id)>
+ * + 20 <max? len(region)>
+ * + 128 <max? len(signed_headers)>
+ */
+ char buffer1[512]; /* -> Canonical Request -> Signature */
+ char buffer2[256]; /* -> String To Sign -> Credential */
+ char iso8601now[ISO8601_SIZE];
+ char signed_headers[48];
+ /* should be large enough for nominal listing:
+ * "host;range;x-amz-content-sha256;x-amz-date"
+ * + '\0', with "range;" possibly absent
+ */
+
+ /* zero start of strings */
+ authorization[0] = 0;
+ buffer1[0] = 0;
+ buffer2[0] = 0;
+ iso8601now[0] = 0;
+ signed_headers[0] = 0;
+
+ /**** VERIFY INFORMATION EXISTS ****/
+
+ if (handle->region == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle must have non-null region.\n");
+ }
+ if (handle->secret_id == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle must have non-null secret_id.\n");
+ }
+ if (handle->signing_key == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle must have non-null signing_key.\n");
+ }
+ if (handle->httpverb == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle must have non-null httpverb.\n");
+ }
+ if (handle->purl->host == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle must have non-null host.\n");
+ }
+ if (handle->purl->path == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "handle must have non-null resource.\n");
+ }
+
+ /**** CREATE HTTP REQUEST STRUCTURE (hrb_t) ****/
+
+ request = H5FD_s3comms_hrb_init_request(
+ (const char *)handle->httpverb,
+ (const char *)handle->purl->path,
+ "HTTP/1.1");
+ if (request == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not allocate hrb_t request.\n");
+ }
+ HDassert( request->magic == S3COMMS_HRB_MAGIC );
+
+ now = gmnow();
+ if (ISO8601NOW(iso8601now, now) != (ISO8601_SIZE - 1)) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not format ISO8601 time.\n");
+ }
+
+ if (FAIL ==
+ H5FD_s3comms_hrb_node_set(
+ &headers,
+ "x-amz-date",
+ (const char *)iso8601now) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to set x-amz-date header")
+ }
+ if (headers == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem building headers list. "
+ "(placeholder flags)\n");
+ }
+ HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC );
+
+ if (FAIL ==
+ H5FD_s3comms_hrb_node_set(
+ &headers,
+ "x-amz-content-sha256",
+ (const char *)EMPTY_SHA256) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to set x-amz-content-sha256 header")
+ }
+ if (headers == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem building headers list. "
+ "(placeholder flags)\n");
+ }
+ HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC );
+
+ if (rangebytesstr != NULL) {
+ if (FAIL ==
+ H5FD_s3comms_hrb_node_set(
+ &headers,
+ "Range",
+ (const char *)rangebytesstr) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to set range header")
+ }
+ if (headers == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem building headers list. "
+ "(placeholder flags)\n");
+ }
+ HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC );
+ }
+
+ if (FAIL ==
+ H5FD_s3comms_hrb_node_set(
+ &headers,
+ "Host",
+ (const char *)handle->purl->host) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to set host header")
+ }
+ if (headers == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem building headers list. "
+ "(placeholder flags)\n");
+ }
+ HDassert( headers->magic == S3COMMS_HRB_NODE_MAGIC );
+
+ request->first_header = headers;
+
+ /**** COMPUTE AUTHORIZATION ****/
+
+ if (FAIL == /* buffer1 -> canonical request */
+ H5FD_s3comms_aws_canonical_request(buffer1,
+ signed_headers,
+ request) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "(placeholder flags)\n");
+ }
+ if ( FAIL == /* buffer2->string-to-sign */
+ H5FD_s3comms_tostringtosign(buffer2,
+ buffer1,
+ iso8601now,
+ handle->region) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "(placeholder flags)\n");
+ }
+ if (FAIL == /* buffer1 -> signature */
+ H5FD_s3comms_HMAC_SHA256(handle->signing_key,
+ SHA256_DIGEST_LENGTH,
+ buffer2,
+ HDstrlen(buffer2),
+ buffer1) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "(placeholder flags)\n");
+ }
+
+ iso8601now[8] = 0; /* trim to yyyyMMDD */
+ ret = S3COMMS_FORMAT_CREDENTIAL(buffer2,
+ handle->secret_id,
+ iso8601now,
+ handle->region,
+ "s3");
+ if (ret == 0 || ret >= S3COMMS_MAX_CREDENTIAL_SIZE)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to format aws4 credential string");
+
+ ret = HDsnprintf(authorization,
+ 512,
+ "AWS4-HMAC-SHA256 Credential=%s,SignedHeaders=%s,Signature=%s",
+ buffer2,
+ signed_headers,
+ buffer1);
+ if (ret == 0 || ret >= 512)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to format aws4 authorization string");
+
+ /* append authorization header to http request buffer
+ */
+ if (FAIL ==
+ H5FD_s3comms_hrb_node_set(
+ &headers,
+ "Authorization",
+ (const char *)authorization) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to set Authorization header")
+ }
+ if (headers == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem building headers list. "
+ "(placeholder flags)\n");
+ }
+
+ /* update hrb's "first header" pointer
+ */
+ request->first_header = headers;
+
+ /**** SET CURLHANDLE HTTP HEADERS FROM GENERATED DATA ****/
+
+ node = request->first_header;
+ while (node != NULL) {
+ HDassert( node->magic == S3COMMS_HRB_NODE_MAGIC );
+ curlheaders = curl_slist_append(curlheaders,
+ (const char *)node->cat);
+ if (curlheaders == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not append header to curl slist. "
+ "(placeholder flags)\n");
+ }
+ node = node->next;
+ }
+
+ /* sanity-check
+ */
+ if (curlheaders == NULL) {
+ /* above loop was probably never run */
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "curlheaders was never populated.\n");
+ }
+
+ /* finally, set http headers in curl handle
+ */
+ if (CURLE_OK !=
+ curl_easy_setopt(curlh,
+ CURLOPT_HTTPHEADER,
+ curlheaders) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "error while setting CURL option "
+ "(CURLOPT_HTTPHEADER). (placeholder flags)");
+ }
+
+ } /* if should authenticate (info provided) */
+
+ /*******************
+ * PERFORM REQUEST *
+ *******************/
+
+#if S3COMMS_CURL_VERBOSITY > 0
+ /* In event of error, print detailed information to stderr
+ * This is not the default behavior.
+ */
+ {
+ long int httpcode = 0;
+ char curlerrbuf[CURL_ERROR_SIZE];
+ curlerrbuf[0] = '\0';
+
+ if (CURLE_OK !=
+ curl_easy_setopt(curlh, CURLOPT_ERRORBUFFER, curlerrbuf) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem setting error buffer")
+ }
+
+ p_status = curl_easy_perform(curlh);
+
+ if (p_status != CURLE_OK) {
+ if (CURLE_OK !=
+ curl_easy_getinfo(curlh, CURLINFO_RESPONSE_CODE, &httpcode) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem getting response code")
+ }
+ HDfprintf(stderr, "CURL ERROR CODE: %d\nHTTP CODE: %d\n",
+ p_status, httpcode);
+ HDfprintf(stderr, "%s\n", curl_easy_strerror(p_status));
+ HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL,
+ "problem while performing request.\n");
+ }
+ if (CURLE_OK !=
+ curl_easy_setopt(curlh, CURLOPT_ERRORBUFFER, NULL) )
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem unsetting error buffer")
+ }
+ } /* verbose error reporting */
+#else
+ p_status = curl_easy_perform(curlh);
+
+ if (p_status != CURLE_OK) {
+ HGOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL,
+ "curl cannot perform request\n")
+ }
+#endif
+
+#if S3COMMS_DEBUG
+ if (dest != NULL) {
+ HDfprintf(stderr, "len: %d\n", (int)len);
+ HDfprintf(stderr, "CHECKING FOR BUFFER OVERFLOW\n");
+ if (sds == NULL) {
+ HDfprintf(stderr, "sds is NULL!\n");
+ } else {
+ HDfprintf(stderr, "sds: 0x%lx\n", (long long)sds);
+ HDfprintf(stderr, "sds->size: %d\n", (int)sds->size);
+ if (len > sds->size) {
+ HDfprintf(stderr, "buffer overwrite\n");
+ }
+ }
+ } else {
+ HDfprintf(stderr, "performed on entire file\n");
+ }
+#endif
+
+done:
+ /* clean any malloc'd resources
+ */
+ if (curlheaders != NULL) {
+ curl_slist_free_all(curlheaders);
+ curlheaders = NULL;
+ }
+ if (rangebytesstr != NULL) {
+ H5MM_xfree(rangebytesstr);
+ rangebytesstr = NULL;
+ }
+ if (sds != NULL) {
+ H5MM_xfree(sds);
+ sds = NULL;
+ }
+ if (request != NULL) {
+ while (headers != NULL)
+ if (FAIL ==
+ H5FD_s3comms_hrb_node_set(&headers, headers->name, NULL))
+ {
+ HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "cannot release header node")
+ }
+ HDassert( NULL == headers );
+ if (FAIL == H5FD_s3comms_hrb_destroy(&request)) {
+ HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "cannot release header request structure")
+ }
+ HDassert( NULL == request );
+ }
+
+ if (curlh != NULL) {
+ /* clear any Range */
+ if (CURLE_OK != curl_easy_setopt(curlh, CURLOPT_RANGE, NULL) )
+ HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "cannot unset CURLOPT_RANGE")
+
+ /* clear headers */
+ if (CURLE_OK != curl_easy_setopt(curlh, CURLOPT_HTTPHEADER, NULL) )
+ HDONE_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "cannot unset CURLOPT_HTTPHEADER")
+ }
+
+#endif /* H5_HAVE_ROS3_VFD */
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_s3r_read */
+
+
+
+/****************************************************************************
+ * MISCELLANEOUS FUNCTIONS
+ ****************************************************************************/
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: gmnow()
+ *
+ * Purpose:
+ *
+ * Get the output of `time.h`'s `gmtime()` call while minimizing setup
+ * clutter where important.
+ *
+ * Return:
+ *
+ * Pointer to resulting `struct tm`,as created by gmtime(time_t * T).
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-12
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+struct tm *
+gmnow(void)
+{
+ time_t now;
+ time_t *now_ptr = &now;
+ struct tm *ret_value = NULL;
+
+ /* Doctor assert, checks against error in time() */
+ if ( (time_t)(-1) != time(now_ptr) )
+ ret_value = gmtime(now_ptr);
+
+ HDassert( ret_value != NULL );
+
+ return ret_value;
+
+} /* gmnow */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_aws_canonical_request()
+ *
+ * Purpose:
+ *
+ * Compose AWS "Canonical Request" (and signed headers string)
+ * as defined in the REST API documentation.
+ *
+ * Both destination strings are null-terminated.
+ *
+ * Destination string arguments must be provided with adequate space.
+ *
+ * Canonical Request format:
+ *
+ * <HTTP VERB>"\n"
+ * <resource path>"\n"
+ * <query string>"\n"
+ * <header1>"\n" (`lowercase(name)`":"`trim(value)`)
+ * <header2>"\n"
+ * ... (headers sorted by name)
+ * <header_n>"\n"
+ * "\n"
+ * <signed headers>"\n" (`lowercase(header 1 name)`";"`header 2 name`;...)
+ * <hex-string of sha256sum of body> ("e3b0c4429...", e.g.)
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - writes canonical request to respective `...dest` strings
+ * - FAILURE: `FAIL`
+ * - one or more input argument was NULL
+ * - internal error
+ *
+ * Programmer: Jacob Smith
+ * 2017-10-04
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_aws_canonical_request(char *canonical_request_dest,
+ char *signed_headers_dest,
+ hrb_t *http_request)
+{
+ hrb_node_t *node = NULL;
+ const char *query_params = ""; /* unused at present */
+ herr_t ret_value = SUCCEED;
+ int ret = 0; /* return value of HDsnprintf */
+ size_t len = 0; /* working string length variable */
+ char tmpstr[256];
+
+ /* "query params" refers to the optional element in the URL, e.g.
+ * http://bucket.aws.com/myfile.txt?max-keys=2&prefix=J
+ * ^-----------------^
+ *
+ * Not handled/implemented as of 2017-10-xx.
+ * Element introduced as empty placeholder and reminder.
+ * Further research to be done if this is ever relevant for the
+ * VFD use-cases.
+ */
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_aws_canonical_request.\n");
+#endif
+
+ if (http_request == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "hrb object cannot be null.\n");
+ }
+ HDassert( http_request->magic == S3COMMS_HRB_MAGIC );
+
+ if (canonical_request_dest == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "canonical request destination cannot be null.\n");
+ }
+
+ if (signed_headers_dest == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "signed headers destination cannot be null.\n");
+ }
+
+ /* HTTP verb, resource path, and query string lines
+ */
+ len = (HDstrlen(http_request->verb) +
+ HDstrlen(http_request->resource) +
+ HDstrlen(query_params) +
+ 3 );
+ ret = HDsnprintf(canonical_request_dest,
+ len + 1,
+ "%s\n%s\n%s\n",
+ http_request->verb,
+ http_request->resource,
+ query_params);
+ if (ret == 0 || (size_t)ret > len)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to compose canonical request first line");
+
+ /* write in canonical headers, building signed headers concurrently
+ */
+ node = http_request->first_header; /* assumed at first sorted */
+ while (node != NULL) {
+ size_t join_len = 0; /* string len of joined header-value */
+
+ HDassert( node->magic == S3COMMS_HRB_NODE_MAGIC );
+
+ len = HDstrlen(node->lowername);
+ join_len = HDstrlen(node->value) + len + 2; /* +2 <- ":\n" */
+ ret = HDsnprintf(tmpstr,
+ join_len + 1, /* +1 for null terminator */
+ "%s:%s\n",
+ node->lowername,
+ node->value);
+ if (ret == 0 || (size_t)ret > join_len)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to concatenate HTTP header %s:%s",
+ node->lowername,
+ node->value);
+ strcat(canonical_request_dest, tmpstr);
+
+ len += 1; /* semicolon */
+ ret = HDsnprintf(tmpstr,
+ len + 1,
+ "%s;",
+ node->lowername);
+ if (ret == 0 || (size_t)ret > len)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to append signed header %s",
+ node->lowername);
+ strcat(signed_headers_dest, tmpstr);
+
+ node = node->next;
+ }
+
+ /* remove tailing ';' from signed headers sequence
+ */
+ signed_headers_dest[HDstrlen(signed_headers_dest) - 1] = '\0';
+
+ /* append signed headers and payload hash
+ * NOTE: at present, no HTTP body is handled, per the nature of
+ * requests/range-gets
+ */
+ strcat(canonical_request_dest, "\n");
+ strcat(canonical_request_dest, signed_headers_dest);
+ strcat(canonical_request_dest, "\n");
+ strcat(canonical_request_dest, EMPTY_SHA256);
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_aws_canonical_request */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_bytes_to_hex()
+ *
+ * Purpose:
+ *
+ * Produce human-readable hex string [0-9A-F] from sequence of bytes.
+ *
+ * For each byte (char), writes two-character hexadecimal representation.
+ *
+ * No null-terminator appended.
+ *
+ * Assumes `dest` is allocated to enough size (msg_len * 2).
+ *
+ * Fails if either `dest` or `msg` are null.
+ *
+ * `msg_len` message length of 0 has no effect.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - hex string written to `dest` (not null-terminated)
+ * - FAILURE: `FAIL`
+ * - `dest == NULL`
+ * - `msg == NULL`
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-12
+ *
+ * Changes:
+ *
+ * - Integrate into HDF.
+ * - Rename from hex() to H5FD_s3comms_bytes_to_hex.
+ * - Change return type from `void` to `herr_t`.
+ * --- Jacob Smtih 2017-09-14
+ *
+ * - Add bool parameter `lowercase` to configure upper/lowercase output
+ * of a-f hex characters.
+ * --- Jacob Smith 2017-09-19
+ *
+ * - Change bool type to `hbool_t`
+ * --- Jacob Smtih 2017-10-11
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_bytes_to_hex(char *dest,
+ const unsigned char *msg,
+ size_t msg_len,
+ hbool_t lowercase)
+{
+ size_t i = 0;
+ herr_t ret_value = SUCCEED;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_bytes_to_hex.\n");
+#endif
+
+ if (dest == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "hex destination cannot be null.\n")
+ }
+ if (msg == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "bytes sequence cannot be null.\n")
+ }
+
+ for (i = 0; i < msg_len; i++) {
+ int chars_written =
+ HDsnprintf(&(dest[i * 2]),
+ 3, /* 'X', 'X', '\n' */
+ (lowercase == TRUE) ? "%02x"
+ : "%02X",
+ msg[i]);
+ if (chars_written != 2)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem while writing hex chars for %c",
+ msg[i]);
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_bytes_to_hex */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_free_purl()
+ *
+ * Purpose:
+ *
+ * Release resources from a parsed_url_t pointer.
+ *
+ * If pointer is null, nothing happens.
+ *
+ * Return:
+ *
+ * `SUCCEED` (never fails)
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-01
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_free_purl(parsed_url_t *purl)
+{
+ FUNC_ENTER_NOAPI_NOINIT_NOERR
+
+#if S3COMMS_DEBUG
+ HDprintf("called H5FD_s3comms_free_purl.\n");
+#endif
+
+ if (purl != NULL) {
+ HDassert( purl->magic == S3COMMS_PARSED_URL_MAGIC );
+ if (purl->scheme != NULL) H5MM_xfree(purl->scheme);
+ if (purl->host != NULL) H5MM_xfree(purl->host);
+ if (purl->port != NULL) H5MM_xfree(purl->port);
+ if (purl->path != NULL) H5MM_xfree(purl->path);
+ if (purl->query != NULL) H5MM_xfree(purl->query);
+ purl->magic += 1ul;
+ H5MM_xfree(purl);
+ }
+
+ FUNC_LEAVE_NOAPI(SUCCEED)
+} /* H5FD_s3comms_free_purl */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_HMAC_SHA256()
+ *
+ * Purpose:
+ *
+ * Generate Hash-based Message Authentication Checksum using the SHA-256
+ * hashing algorithm.
+ *
+ * Given a key, message, and respective lengths (to accommodate null
+ * characters in either), generate _hex string_ of authentication checksum
+ * and write to `dest`.
+ *
+ * `dest` must be at least `SHA256_DIGEST_LENGTH * 2` characters in size.
+ * Not enforceable by this function.
+ * `dest` will _not_ be null-terminated by this function.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - hex string written to `dest` (not null-terminated)
+ * - FAILURE: `FAIL`
+ * - `dest == NULL`
+ * - error while generating hex string output
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-??
+ *
+ * Changes:
+ *
+ * - Integrate with HDF5.
+ * - Rename from `HMAC_SHA256` to `H5FD_s3comms_HMAC_SHA256`.
+ * - Rename output parameter from `md` to `dest`.
+ * - Return `herr_t` type instead of `void`.
+ * - Call `H5FD_s3comms_bytes_to_hex` to generate hex cleartext for output.
+ * --- Jacob Smith 2017-09-19
+ *
+ * - Use static char array instead of malloc'ing `md`
+ * --- Jacob Smith 2017-10-10
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_HMAC_SHA256(const unsigned char *key,
+ size_t key_len,
+ const char *msg,
+ size_t msg_len,
+ char *dest)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ unsigned char md[SHA256_DIGEST_LENGTH];
+ unsigned int md_len = SHA256_DIGEST_LENGTH;
+ herr_t ret_value = SUCCEED;
+#else
+ herr_t ret_value = FAIL;
+#endif /* H5_HAVE_ROS3_VFD */
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5_HAVE_ROS3_VFD
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_HMAC_SHA256.\n");
+#endif
+
+ if (dest == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "destination cannot be null.");
+ }
+
+ HMAC(EVP_sha256(),
+ key,
+ (int)key_len,
+ (const unsigned char *)msg,
+ msg_len,
+ md,
+ &md_len);
+
+ if (FAIL ==
+ H5FD_s3comms_bytes_to_hex(dest,
+ (const unsigned char *)md,
+ (size_t)md_len,
+ true))
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not convert to hex string.");
+ }
+
+#endif /* H5_HAVE_ROS3_VFD */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_HMAC_SHA256 */
+
+
+/*-----------------------------------------------------------------------------
+ *
+ * Function: H5FD__s3comms_load_aws_creds_from_file()
+ *
+ * Purpose:
+ *
+ * Extract AWS configuration information from a target file.
+ *
+ * Given a file and a profile name, e.g. "ros3_vfd_test", attempt to locate
+ * that region in the file. If not found, returns in error and output
+ * pointers are not modified.
+ *
+ * If the profile label is found, attempts to locate and parse configuration
+ * data, stopping at the first line where:
+ * + reached end of file
+ * + line does not start with a recognized setting name
+ *
+ * Following AWS documentation, looks for any of:
+ * + aws_access_key_id
+ * + aws_secret_access_key
+ * + region
+ *
+ * To be valid, the setting must begin the line with one of the keywords,
+ * followed immediately by an equals sign '=', and have some data before
+ * newline at end of line.
+ * + `spam=eggs` would be INVALID because name is unrecognized
+ * + `region = us-east-2` would be INVALID because of spaces
+ * + `region=` would be INVALID because no data.
+ *
+ * Upon successful parsing of a setting line, will store the result in the
+ * corresponding output pointer. If the output pointer is NULL, will skip
+ * any matching setting line while parsing -- useful to prevent overwrite
+ * when reading from multiple files.
+ *
+ * Return:
+ *
+ * + SUCCESS: `SUCCEED`
+ * + no error. settings may or may not have been loaded.
+ * + FAILURE: `FAIL`
+ * + internal error occurred.
+ * + -1 :: unable to format profile label
+ * + -2 :: profile name/label not found in file
+ * + -3 :: some other error
+ *
+ * Programmer: Jacob Smith
+ * 2018-02-27
+ *
+ * Changes: None
+ *
+ *-----------------------------------------------------------------------------
+ */
+static herr_t
+H5FD__s3comms_load_aws_creds_from_file(
+ FILE *file,
+ const char *profile_name,
+ char *key_id,
+ char *access_key,
+ char *aws_region)
+{
+ char profile_line[32];
+ char buffer[128];
+ const char *setting_names[] = {
+ "region",
+ "aws_access_key_id",
+ "aws_secret_access_key",
+ };
+ char * const setting_pointers[] = {
+ aws_region,
+ key_id,
+ access_key,
+ };
+ unsigned setting_count = 3;
+ herr_t ret_value = SUCCEED;
+ unsigned buffer_i = 0;
+ unsigned setting_i = 0;
+ int found_setting = 0;
+ char *line_buffer = &(buffer[0]);
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called load_aws_creds_from_file.\n");
+#endif
+
+ /* format target line for start of profile */
+ if (32 < HDsnprintf(profile_line, 32, "[%s]", profile_name))
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL,
+ "unable to format profile label")
+
+ /* look for start of profile */
+ do {
+ /* clear buffer */
+ for (buffer_i=0; buffer_i < 128; buffer_i++) buffer[buffer_i] = 0;
+
+ line_buffer = fgets(line_buffer, 128, file);
+ if (line_buffer == NULL) /* reached end of file */
+ goto done;
+ } while (strncmp(line_buffer, profile_line, HDstrlen(profile_line)));
+
+ /* extract credentials from lines */
+ do {
+ size_t setting_name_len = 0;
+ const char *setting_name = NULL;
+ char line_prefix[128];
+
+ /* clear buffer */
+ for (buffer_i=0; buffer_i < 128; buffer_i++) buffer[buffer_i] = 0;
+
+ /* collect a line from file */
+ line_buffer = fgets(line_buffer, 128, file);
+ if (line_buffer == NULL)
+ goto done; /* end of file */
+
+ /* loop over names to see if line looks like assignment */
+ for (setting_i = 0; setting_i < setting_count; setting_i++) {
+ setting_name = setting_names[setting_i];
+ setting_name_len = HDstrlen(setting_name);
+ if (128 < HDsnprintf(
+ line_prefix,
+ setting_name_len+2,
+ "%s=",
+ setting_name))
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL,
+ "unable to format line prefix")
+
+ /* found a matching name? */
+ if (!strncmp(line_buffer, line_prefix, setting_name_len + 1)) {
+ found_setting = 1;
+
+ /* skip NULL destination buffer */
+ if (setting_pointers[setting_i] == NULL)
+ break;
+
+ /* advance to end fo name in string */
+ do {
+ line_buffer++;
+ } while (*line_buffer != 0 && *line_buffer != '=');
+
+ if (*line_buffer == 0 || *(line_buffer+1) == 0)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "incomplete assignment in file")
+ line_buffer++; /* was pointing at '='; advance */
+
+ /* copy line buffer into out pointer */
+ strcpy(setting_pointers[setting_i], (const char *)line_buffer);
+
+ /* "trim" tailing whitespace by replacing with null terminator*/
+ buffer_i = 0;
+ while (!isspace(setting_pointers[setting_i][buffer_i]))
+ buffer_i++;
+ setting_pointers[setting_i][buffer_i] = '\0';
+
+ break; /* have read setting; don't compare with others */
+ }
+ }
+ } while (found_setting);
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD__s3comms_load_aws_creds_from_file */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_load_aws_profile()
+ *
+ * Purpose :
+ *
+ * Read aws profile elements from standard location on system and store
+ * settings in memory.
+ *
+ * Looks for both `~/.aws/config` and `~/.aws/credentials`, the standard
+ * files for AWS tools. If a file exists (can be opened), looks for the
+ * given profile name and reads the settings into the relevant buffer.
+ *
+ * Any setting duplicated in both files will be set to that from
+ * `credentials`.
+ *
+ * Settings are stored in the supplied buffers as null-terminated strings.
+ *
+ * Return:
+ *
+ * + SUCCESS: `SUCCEED` (0)
+ * + no error occurred and all settings were populated
+ * + FAILURE: `FAIL` (-1)
+ * + internal error occurred
+ * + unable to locate profile
+ * + region, key id, and secret key were not all found and set
+ *
+ * Programmer: Jacob Smith
+ * 2018-02-27
+ *
+ * Changes: None
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_load_aws_profile(const char *profile_name,
+ char *key_id_out,
+ char *secret_access_key_out,
+ char *aws_region_out)
+{
+ herr_t ret_value = SUCCEED;
+ FILE *credfile = NULL;
+ char awspath[117];
+ char filepath[128];
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_load_aws_profile.\n");
+#endif
+
+ /* TODO: Windows and other path gotchas */
+ if (117 < HDsnprintf(awspath, 117, "%s/.aws/", getenv("HOME")))
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL,
+ "unable to format home-aws path")
+ if (128 < HDsnprintf(filepath, 128, "%s%s", awspath, "credentials"))
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL,
+ "unable to format credentials path")
+
+ credfile = fopen(filepath, "r");
+ if (credfile != NULL) {
+ if (FAIL == H5FD__s3comms_load_aws_creds_from_file(
+ credfile,
+ profile_name,
+ key_id_out,
+ secret_access_key_out,
+ aws_region_out))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to load from aws credentials")
+ if (EOF == fclose(credfile))
+ HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
+ "unable to close credentials file")
+ credfile = NULL;
+ }
+
+ if (128 < HDsnprintf(filepath, 128, "%s%s", awspath, "config"))
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTCOPY, FAIL,
+ "unable to format config path")
+ credfile = fopen(filepath, "r");
+ if (credfile != NULL) {
+ if (FAIL == H5FD__s3comms_load_aws_creds_from_file(
+ credfile,
+ profile_name,
+ (*key_id_out == 0) ? key_id_out : NULL,
+ (*secret_access_key_out == 0) ? secret_access_key_out : NULL,
+ (*aws_region_out == 0) ? aws_region_out : NULL))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to load from aws config")
+ if (EOF == fclose(credfile))
+ HGOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
+ "unable to close config file")
+ credfile = NULL;
+ }
+
+ /* fail if not all three settings were loaded */
+ if (*key_id_out == 0 ||
+ *secret_access_key_out == 0 ||
+ *aws_region_out == 0)
+ {
+ ret_value = FAIL;
+ }
+
+done:
+ if (credfile != NULL) {
+ if (EOF == fclose(credfile))
+ HDONE_ERROR(H5E_ARGS, H5E_ARGS, FAIL,
+ "problem error-closing aws configuration file")
+ }
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_load_aws_profile */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_nlowercase()
+ *
+ * Purpose:
+ *
+ * From string starting at `s`, write `len` characters to `dest`,
+ * converting all to lowercase.
+ *
+ * Behavior is undefined if `s` is NULL or `len` overruns the allocated
+ * space of either `s` or `dest`.
+ *
+ * Provided as convenience.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - upon completion, `dest` is populated
+ * - FAILURE: `FAIL`
+ * - `dest == NULL`
+ *
+ * Programmer: Jacob Smith
+ * 2017-09-18
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_nlowercase(char *dest,
+ const char *s,
+ size_t len)
+{
+ herr_t ret_value = SUCCEED;
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_nlowercase.\n");
+#endif
+
+ if (dest == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "destination cannot be null.\n");
+ }
+
+ if (len > 0) {
+ HDmemcpy(dest, s, len);
+ do {
+ len--;
+ dest[len] = (char)tolower( (int)dest[len] );
+ } while (len > 0);
+ }
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_nlowercase */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_parse_url()
+ *
+ * Purpose:
+ *
+ * Parse URL-like string and stuff URL components into
+ * `parsed_url` structure, if possible.
+ *
+ * Expects null-terminated string of format:
+ * SCHEME "://" HOST [":" PORT ] ["/" [ PATH ] ] ["?" QUERY]
+ * where SCHEME :: "[a-zA-Z/.-]+"
+ * PORT :: "[0-9]"
+ *
+ * Stores resulting structure in argument pointer `purl`, if successful,
+ * creating and populating new `parsed_url_t` structure pointer.
+ * Empty or absent elements are NULL in new purl structure.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - `purl` pointer is populated
+ * - FAILURE: `FAIL`
+ * - unable to parse
+ * - `purl` is unaltered (probably NULL)
+ *
+ * Programmer: Jacob Smith
+ * 2017-10-30
+ *
+ * Changes: None.
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_parse_url(const char *str,
+ parsed_url_t **_purl)
+{
+ parsed_url_t *purl = NULL; /* pointer to new structure */
+ const char *tmpstr = NULL; /* working pointer in string */
+ const char *curstr = str; /* "start" pointer in string */
+ long int len = 0; /* substring length */
+ long int urllen = 0; /* length of passed-in url string */
+ unsigned int i = 0;
+ herr_t ret_value = FAIL;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT;
+
+#if S3COMMS_DEBUG
+ HDprintf("called H5FD_s3comms_parse_url.\n");
+#endif
+
+ if (str == NULL || *str == '\0') {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "invalid url string");
+ }
+
+ urllen = (long int)HDstrlen(str);
+
+ purl = (parsed_url_t *)H5MM_malloc(sizeof(parsed_url_t));
+ if (purl == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "can't allocate space for parsed_url_t");
+ }
+ purl->magic = S3COMMS_PARSED_URL_MAGIC;
+ purl->scheme = NULL;
+ purl->host = NULL;
+ purl->port = NULL;
+ purl->path = NULL;
+ purl->query = NULL;
+
+ /***************
+ * READ SCHEME *
+ ***************/
+
+ tmpstr = strchr(curstr, ':');
+ if (tmpstr == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "invalid SCHEME construction: probably not URL");
+ }
+ len = tmpstr - curstr;
+ HDassert( (0 <= len) && (len < urllen) );
+
+ /* check for restrictions
+ */
+ for (i = 0; i < len; i++) {
+ /* scheme = [a-zA-Z+-.]+ (terminated by ":") */
+ if (!isalpha(curstr[i]) &&
+ '+' != curstr[i] &&
+ '-' != curstr[i] &&
+ '.' != curstr[i])
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "invalid SCHEME construction");
+ }
+ }
+ /* copy lowercased scheme to structure
+ */
+ purl->scheme = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1));
+ if (purl->scheme == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "can't allocate space for SCHEME");
+ }
+ (void)HDstrncpy(purl->scheme, curstr, (size_t)len);
+ purl->scheme[len] = '\0';
+ for ( i = 0; i < len; i++ ) {
+ purl->scheme[i] = (char)tolower(purl->scheme[i]);
+ }
+
+ /* Skip "://" */
+ tmpstr += 3;
+ curstr = tmpstr;
+
+ /*************
+ * READ HOST *
+ *************/
+
+ if (*curstr == '[') {
+ /* IPv6 */
+ while (']' != *tmpstr) {
+ if (tmpstr == 0) { /* end of string reached! */
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "reached end of URL: incomplete IPv6 HOST");
+ }
+ tmpstr++;
+ }
+ tmpstr++;
+ } else {
+ while (0 != *tmpstr) {
+ if (':' == *tmpstr ||
+ '/' == *tmpstr ||
+ '?' == *tmpstr)
+ {
+ break;
+ }
+ tmpstr++;
+ }
+ } /* if IPv4 or IPv6 */
+ len = tmpstr - curstr;
+ if (len == 0) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "HOST substring cannot be empty");
+ } else if (len > urllen) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem with length of HOST substring");
+ }
+
+ /* copy host
+ */
+ purl->host = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1));
+ if (purl->host == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "can't allocate space for HOST");
+ }
+ (void)HDstrncpy(purl->host, curstr, (size_t)len);
+ purl->host[len] = 0;
+
+ /*************
+ * READ PORT *
+ *************/
+
+ if (':' == *tmpstr) {
+ tmpstr += 1; /* advance past ':' */
+ curstr = tmpstr;
+ while ((0 != *tmpstr) && ('/' != *tmpstr) && ('?' != *tmpstr)) {
+ tmpstr++;
+ }
+ len = tmpstr - curstr;
+ if (len == 0) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "PORT element cannot be empty");
+ } else if (len > urllen) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem with length of PORT substring");
+ }
+ for (i = 0; i < len; i ++) {
+ if (!isdigit(curstr[i])) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "PORT is not a decimal string");
+ }
+ }
+
+ /* copy port
+ */
+ purl->port = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1));
+ if (purl->port == NULL) { /* cannot malloc */
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "can't allocate space for PORT");
+ }
+ (void)HDstrncpy(purl->port, curstr, (size_t)len);
+ purl->port[len] = 0;
+ } /* if PORT element */
+
+ /*************
+ * READ PATH *
+ *************/
+
+ if ('/' == *tmpstr) {
+ /* advance past '/' */
+ tmpstr += 1;
+ curstr = tmpstr;
+
+ /* seek end of PATH
+ */
+ while ((0 != *tmpstr) && ('?' != *tmpstr)) {
+ tmpstr++;
+ }
+ len = tmpstr - curstr;
+ if (len > urllen) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem with length of PATH substring");
+ }
+ if (len > 0) {
+ purl->path = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1));
+ if (purl->path == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "can't allocate space for PATH");
+ } /* cannot malloc path pointer */
+ (void)HDstrncpy(purl->path, curstr, (size_t)len);
+ purl->path[len] = 0;
+ }
+ } /* if PATH element */
+
+ /**************
+ * READ QUERY *
+ **************/
+
+ if ('?' == *tmpstr) {
+ tmpstr += 1;
+ curstr = tmpstr;
+ while (0 != *tmpstr) {
+ tmpstr++;
+ }
+ len = tmpstr - curstr;
+ if (len == 0) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "QUERY cannot be empty");
+ } else if (len > urllen) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem with length of QUERY substring");
+ }
+ purl->query = (char *)H5MM_malloc(sizeof(char) * (size_t)(len + 1));
+ if (purl->query == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_CANTALLOC, FAIL,
+ "can't allocate space for QUERY");
+ } /* cannot malloc path pointer */
+ (void)HDstrncpy(purl->query, curstr, (size_t)len);
+ purl->query[len] = 0;
+ } /* if QUERY exists */
+
+
+
+ *_purl = purl;
+ ret_value = SUCCEED;
+
+done:
+ if (ret_value == FAIL) {
+ H5FD_s3comms_free_purl(purl);
+ }
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_parse_url */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_percent_encode_char()
+ *
+ * Purpose:
+ *
+ * "Percent-encode" utf-8 character `c`, e.g.,
+ * '$' -> "%24"
+ * '¢' -> "%C2%A2"
+ *
+ * `c` cannot be null.
+ *
+ * Does not (currently) accept multi-byte characters...
+ * limit to (?) u+00ff, well below upper bound for two-byte utf-8 encoding
+ * (u+0080..u+07ff).
+ *
+ * Writes output to `repr`.
+ * `repr` cannot be null.
+ * Assumes adequate space i `repr`...
+ * >>> char[4] or [7] for most characters,
+ * >>> [13] as theoretical maximum.
+ *
+ * Representation `repr` is null-terminated.
+ *
+ * Stores length of representation (without null terminator) at pointer
+ * `repr_len`.
+ *
+ * Return : SUCCEED/FAIL
+ *
+ * - SUCCESS: `SUCCEED`
+ * - percent-encoded representation written to `repr`
+ * - 'repr' is null-terminated
+ * - FAILURE: `FAIL`
+ * - `c` or `repr` was NULL
+ *
+ * Programmer: Jacob Smith
+ *
+ * Changes:
+ *
+ * - Integrate into HDF.
+ * - Rename from `hexutf8` to `H5FD_s3comms_percent_encode_char`.
+ * --- Jacob Smith 2017-09-15
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_percent_encode_char(char *repr,
+ const unsigned char c,
+ size_t *repr_len)
+{
+ unsigned int acc = 0;
+ unsigned int i = 0;
+ unsigned int k = 0;
+ unsigned int stack[4] = {0, 0, 0, 0};
+ unsigned int stack_size = 0;
+ int chars_written = 0;
+ herr_t ret_value = SUCCEED;
+#if S3COMMS_DEBUG
+ unsigned char s[2] = {c, 0};
+ unsigned char hex[3] = {0, 0, 0};
+#endif
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_percent_encode_char.\n");
+#endif
+
+ if (repr == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no destination `repr`.\n")
+ }
+
+#if S3COMMS_DEBUG
+ H5FD_s3comms_bytes_to_hex((char *)hex, s, 1, FALSE);
+ HDfprintf(stdout, " CHAR: \'%s\'\n", s);
+ HDfprintf(stdout, " CHAR-HEX: \"%s\"\n", hex);
+#endif
+
+ if (c <= (unsigned char)0x7f) {
+ /* character represented in a single "byte"
+ * and single percent-code
+ */
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, " SINGLE-BYTE\n");
+#endif
+ *repr_len = 3;
+ chars_written = HDsnprintf(repr, 4, "%%%02X", c);
+ if (chars_written != 3)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "cannot write char %c",
+ c);
+ } else {
+ /* multi-byte, multi-percent representation
+ */
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, " MULTI-BYTE\n");
+#endif
+ stack_size = 0;
+ k = (unsigned int)c;
+ *repr_len = 0;
+ do {
+ /* push number onto stack in six-bit slices
+ */
+ acc = k;
+ acc >>= 6; /* cull least */
+ acc <<= 6; /* six bits */
+ stack[stack_size++] = k - acc; /* max six-bit number */
+ k = acc >> 6;
+ } while (k > 0);
+
+ /* now have "stack" of two to four six-bit numbers
+ * to be put into UTF-8 byte fields
+ */
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, " STACK:\n {\n");
+ for (i = 0; i < stack_size; i++) {
+ H5FD_s3comms_bytes_to_hex((char *)hex,
+ (unsigned char *)(&stack[i]),
+ 1,
+ FALSE);
+ hex[2] = 0;
+ HDfprintf(stdout, " %s,\n", hex);
+ }
+ HDfprintf(stdout, " }\n");
+#endif
+
+ /****************
+ * leading byte *
+ ****************/
+
+ /* prepend 11[1[1]]0 to first byte */
+ /* 110xxxxx, 1110xxxx, or 11110xxx */
+ acc = 0xC0; /* 2^7 + 2^6 -> 11000000 */
+ acc += (stack_size > 2) ? 0x20 : 0;
+ acc += (stack_size > 3) ? 0x10 : 0;
+ stack_size -= 1;
+ chars_written = HDsnprintf(repr, 4, "%%%02X", acc + stack[stack_size]);
+ if (chars_written != 3)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "cannot write char %c",
+ c);
+ *repr_len += 3;
+
+ /************************
+ * continuation byte(s) *
+ ************************/
+
+ /* 10xxxxxx */
+ for (i = 0; i < stack_size; i++) {
+ chars_written = HDsnprintf(&repr[i*3 + 3],
+ 4,
+ "%%%02X",
+ 128 + stack[stack_size - 1 - i]);
+ if (chars_written != 3)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "cannot write char %c",
+ c);
+ *repr_len += 3;
+ }
+ }
+ *(repr + *repr_len) = '\0';
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_percent_encode_char */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_signing_key()
+ *
+ * Purpose:
+ *
+ * Create AWS4 "Signing Key" from secret key, AWS region, and timestamp.
+ *
+ * Sequentially runs HMAC_SHA256 on strings in specified order,
+ * generating re-usable checksum (according to documentation, valid for
+ * 7 days from time given).
+ *
+ * `secret` is `access key id` for targeted service/bucket/resource.
+ *
+ * `iso8601now` must conform to format, yyyyMMDD'T'hhmmss'Z'
+ * e.g. "19690720T201740Z".
+ *
+ * `region` should be one of AWS service region names, e.g. "us-east-1".
+ *
+ * Hard-coded "service" algorithm requirement to "s3".
+ *
+ * Inputs must be null-terminated strings.
+ *
+ * Writes to `md` the raw byte data, length of `SHA256_DIGEST_LENGTH`.
+ * Programmer must ensure that `md` is appropriately allocated.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - raw byte data of signing key written to `md`
+ * - FAILURE: `FAIL`
+ * - if any input arguments was NULL
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-13
+ *
+ * Changes:
+ *
+ * - Integrate into HDF5.
+ * - Return herr_t type.
+ * --- Jacob Smith 2017-09-18
+ *
+ * - NULL check and fail of input parameters.
+ * --- Jacob Smith 2017-10-10
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_signing_key(unsigned char *md,
+ const char *secret,
+ const char *region,
+ const char *iso8601now)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ char *AWS4_secret = NULL;
+ size_t AWS4_secret_len = 0;
+ unsigned char datekey[SHA256_DIGEST_LENGTH];
+ unsigned char dateregionkey[SHA256_DIGEST_LENGTH];
+ unsigned char dateregionservicekey[SHA256_DIGEST_LENGTH];
+ int ret = 0; /* return value of HDsnprintf */
+ herr_t ret_value = SUCCEED;
+#else
+ herr_t ret_value = SUCCEED;
+#endif /* H5_HAVE_ROS3_VFD */
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5_HAVE_ROS3_VFD
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_signing_key.\n");
+#endif
+
+ if (md == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Destination `md` cannot be NULL.\n")
+ }
+ if (secret == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "`secret` cannot be NULL.\n")
+ }
+ if (region == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "`region` cannot be NULL.\n")
+ }
+ if (iso8601now == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "`iso8601now` cannot be NULL.\n")
+ }
+
+ AWS4_secret_len = 4 + HDstrlen(secret) + 1;
+ AWS4_secret = (char*)H5MM_malloc(sizeof(char *) * AWS4_secret_len);
+ if (AWS4_secret == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Could not allocate space.\n")
+ }
+
+ /* prepend "AWS4" to start of the secret key
+ */
+ ret = HDsnprintf(AWS4_secret, AWS4_secret_len,"%s%s", "AWS4", secret);
+ if ((size_t)ret != (AWS4_secret_len - 1))
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem writing AWS4+secret `%s`",
+ secret);
+
+ /* hash_func, key, len(key), msg, len(msg), digest_dest, digest_len_dest
+ * we know digest length, so ignore via NULL
+ */
+ HMAC(EVP_sha256(),
+ (const unsigned char *)AWS4_secret,
+ (int)HDstrlen(AWS4_secret),
+ (const unsigned char*)iso8601now,
+ 8, /* 8 --> length of 8 --> "yyyyMMDD" */
+ datekey,
+ NULL);
+ HMAC(EVP_sha256(),
+ (const unsigned char *)datekey,
+ SHA256_DIGEST_LENGTH,
+ (const unsigned char *)region,
+ HDstrlen(region),
+ dateregionkey,
+ NULL);
+ HMAC(EVP_sha256(),
+ (const unsigned char *)dateregionkey,
+ SHA256_DIGEST_LENGTH,
+ (const unsigned char *)"s3",
+ 2,
+ dateregionservicekey,
+ NULL);
+ HMAC(EVP_sha256(),
+ (const unsigned char *)dateregionservicekey,
+ SHA256_DIGEST_LENGTH,
+ (const unsigned char *)"aws4_request",
+ 12,
+ md,
+ NULL);
+
+done:
+ H5MM_xfree(AWS4_secret);
+
+#endif /* H5_HAVE_ROS3_VFD */
+
+ FUNC_LEAVE_NOAPI(ret_value);
+
+} /* H5FD_s3comms_signing_key */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_tostringtosign()
+ *
+ * Purpose:
+ *
+ * Get AWS "String to Sign" from Canonical Request, timestamp,
+ * and AWS "region".
+ *
+ * Common between single request and "chunked upload",
+ * conforms to:
+ * "AWS4-HMAC-SHA256\n" +
+ * <ISO8601 date format> + "\n" + // yyyyMMDD'T'hhmmss'Z'
+ * <yyyyMMDD> + "/" + <AWS Region> + "/s3/aws4-request\n" +
+ * hex(SHA256(<CANONICAL-REQUEST>))
+ *
+ * Inputs `creq` (canonical request string), `now` (ISO8601 format),
+ * and `region` (s3 region designator string) must all be
+ * null-terminated strings.
+ *
+ * Result is written to `dest` with null-terminator.
+ * It is left to programmer to ensure `dest` has adequate space.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - "string to sign" written to `dest` and null-terminated
+ * - FAILURE: `FAIL`
+ * - if any of the inputs are NULL
+ * - if an error is encountered while computing checksum
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-??
+ *
+ * Changes:
+ *
+ * - Integrate with HDF5.
+ * - Rename from `tostringtosign` to `H5FD_s3comms_tostringtosign`.
+ * - Return `herr_t` instead of characters written.
+ * - Use HDF-friendly bytes-to-hex function (`H5FD_s3comms_bytes_to_hex`)
+ * instead of general-purpose, deprecated `hex()`.
+ * - Adjust casts to openssl's `SHA256`.
+ * - Input strings are now `const`.
+ * --- Jacob Smith 2017-09-19
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_tostringtosign(char *dest,
+ const char *req,
+ const char *now,
+ const char *region)
+{
+#ifdef H5_HAVE_ROS3_VFD
+ unsigned char checksum[SHA256_DIGEST_LENGTH * 2 + 1];
+ size_t d = 0;
+ char day[9];
+ char hexsum[SHA256_DIGEST_LENGTH * 2 + 1];
+ size_t i = 0;
+ int ret = 0; /* HDsnprintf return value */
+ herr_t ret_value = SUCCEED;
+ char tmp[128];
+#else
+ herr_t ret_value = FAIL;
+#endif /* H5_HAVE_ROS3_VFD */
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#ifdef H5_HAVE_ROS3_VFD
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_tostringtosign.\n");
+#endif
+
+ if (dest == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "destination buffer cannot be null.\n")
+ }
+ if (req == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "canonical request cannot be null.\n")
+ }
+ if (now == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Timestring cannot be NULL.\n")
+ }
+ if (region == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "Region cannot be NULL.\n")
+ }
+
+
+
+ for (i = 0; i < 128; i++) {
+ tmp[i] = '\0';
+ }
+ for (i = 0; i < SHA256_DIGEST_LENGTH * 2 + 1; i++) {
+ checksum[i] = '\0';
+ hexsum[i] = '\0';
+ }
+ HDstrncpy(day, now, 8);
+ day[8] = '\0';
+ ret = HDsnprintf(tmp, 127, "%s/%s/s3/aws4_request", day, region);
+ if (ret <= 0 || ret >= 127)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "problem adding day and region to string")
+
+
+
+ HDmemcpy((dest + d), "AWS4-HMAC-SHA256\n", 17);
+ d = 17;
+
+ HDmemcpy((dest+d), now, HDstrlen(now));
+ d += HDstrlen(now);
+ dest[d++] = '\n';
+
+ HDmemcpy((dest + d), tmp, HDstrlen(tmp));
+ d += HDstrlen(tmp);
+ dest[d++] = '\n';
+
+ SHA256((const unsigned char *)req,
+ HDstrlen(req),
+ checksum);
+
+ if (FAIL ==
+ H5FD_s3comms_bytes_to_hex(hexsum,
+ (const unsigned char *)checksum,
+ SHA256_DIGEST_LENGTH,
+ true))
+ {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "could not create hex string");
+ }
+
+ for (i = 0; i < SHA256_DIGEST_LENGTH * 2; i++) {
+ dest[d++] = hexsum[i];
+ }
+
+ dest[d] = '\0';
+
+#endif /* H5_HAVE_ROS3_VFD */
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5ros3_tostringtosign */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_trim()
+ *
+ * Purpose:
+ *
+ * Remove all whitespace characters from start and end of a string `s`
+ * of length `s_len`, writing trimmed string copy to `dest`.
+ * Stores number of characters remaining at `n_written`.
+ *
+ * Destination for trimmed copy `dest` cannot be null.
+ * `dest` must have adequate space allocated for trimmed copy.
+ * If inadequate space, behavior is undefined, possibly resulting
+ * in segfault or overwrite of other data.
+ *
+ * If `s` is NULL or all whitespace, `dest` is untouched and `n_written`
+ * is set to 0.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ * - `dest == NULL`
+ *
+ * Programmer: Jacob Smith
+ * 2017-09-18
+ *
+ * Changes:
+ *
+ * - Rename from `trim()` to `H5FD_s3comms_trim()`.
+ * - Incorporate into HDF5.
+ * - Returns `herr_t` type.
+ * --- Jacob Smith 2017-??-??
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_trim(char *dest,
+ char *s,
+ size_t s_len,
+ size_t *n_written)
+{
+ herr_t ret_value = SUCCEED;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "called H5FD_s3comms_trim.\n");
+#endif
+
+ if (dest == NULL) {
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "destination cannot be null.")
+ }
+ if (s == NULL) {
+ s_len = 0;
+ }
+
+
+
+ if (s_len > 0) {
+ /* Find first non-whitespace character from start;
+ * reduce total length per character.
+ */
+ while ((s_len > 0) &&
+ isspace((unsigned char)s[0]) && s_len > 0)
+ {
+ s++;
+ s_len--;
+ }
+
+ /* Find first non-whitespace character from tail;
+ * reduce length per-character.
+ * If length is 0 already, there is no non-whitespace character.
+ */
+ if (s_len > 0) {
+ do {
+ s_len--;
+ } while( isspace((unsigned char)s[s_len]) );
+ s_len++;
+
+ /* write output into dest
+ */
+ HDmemcpy(dest, s, s_len);
+ }
+ }
+
+ *n_written = s_len;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_s3comms_trim */
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Function: H5FD_s3comms_uriencode()
+ *
+ * Purpose:
+ *
+ * URIencode (percent-encode) every byte except "[a-zA-Z0-9]-._~".
+ *
+ * For each character in source string `_s` from `s[0]` to `s[s_len-1]`,
+ * writes to `dest` either the raw character or its percent-encoded
+ * equivalent.
+ *
+ * See `H5FD_s3comms_bytes_to_hex` for information on percent-encoding.
+ *
+ * Space (' ') character encoded as "%20" (not "+")
+ *
+ * Forward-slash ('/') encoded as "%2F" only when `encode_slash == true`.
+ *
+ * Records number of characters written at `n_written`.
+ *
+ * Assumes that `dest` has been allocated with enough space.
+ *
+ * Neither `dest` nor `s` can be NULL.
+ *
+ * `s_len == 0` will have no effect.
+ *
+ * Return:
+ *
+ * - SUCCESS: `SUCCEED`
+ * - FAILURE: `FAIL`
+ * - source strings `s` or destination `dest` are NULL
+ * - error while attempting to percent-encode a character
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-??
+ *
+ * Changes:
+ *
+ * - Integrate to HDF environment.
+ * - Rename from `uriencode` to `H5FD_s3comms_uriencode`.
+ * - Change return from characters written to herr_t;
+ * move to i/o parameter `n_written`.
+ * - No longer append null-terminator to string;
+ * programmer may append or not as appropriate upon return.
+ * --- Jacob Smith 2017-09-15
+ *
+ *----------------------------------------------------------------------------
+ */
+herr_t
+H5FD_s3comms_uriencode(char *dest,
+ const char *s,
+ size_t s_len,
+ hbool_t encode_slash,
+ size_t *n_written)
+{
+ char c = 0;
+ size_t dest_off = 0;
+ char hex_buffer[13];
+ size_t hex_off = 0;
+ size_t hex_len = 0;
+ herr_t ret_value = SUCCEED;
+ size_t s_off = 0;
+
+
+
+ FUNC_ENTER_NOAPI_NOINIT
+
+#if S3COMMS_DEBUG
+ HDfprintf(stdout, "H5FD_s3comms_uriencode called.\n");
+#endif
+
+ if (s == NULL)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "source string cannot be NULL");
+ if (dest == NULL)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "destination cannot be NULL");
+
+ /* Write characters to destination, converting to percent-encoded
+ * "hex-utf-8" strings if necessary.
+ * e.g., '$' -> "%24"
+ */
+ for (s_off = 0; s_off < s_len; s_off++) {
+ c = s[s_off];
+ if (isalnum(c) ||
+ c == '.' ||
+ c == '-' ||
+ c == '_' ||
+ c == '~' ||
+ (c == '/' && encode_slash == FALSE))
+ {
+ dest[dest_off++] = c;
+ } else {
+ hex_off = 0;
+ if (FAIL ==
+ H5FD_s3comms_percent_encode_char(hex_buffer,
+ (const unsigned char)c,
+ &hex_len))
+ {
+ hex_buffer[0] = c;
+ hex_buffer[1] = 0;
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "unable to percent-encode character \'%s\' "
+ "at %d in \"%s\"", hex_buffer, (int)s_off, s);
+ }
+
+ for (hex_off = 0; hex_off < hex_len; hex_off++) {
+ dest[dest_off++] = hex_buffer[hex_off];
+ }
+ }
+ }
+
+ if (dest_off < s_len)
+ HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL,
+ "buffer overflow");
+
+ *n_written = dest_off;
+
+done:
+ FUNC_LEAVE_NOAPI(ret_value)
+
+} /* H5FD_s3comms_uriencode */
+
+
diff --git a/src/H5FDs3comms.h b/src/H5FDs3comms.h
new file mode 100644
index 0000000..0524c46
--- /dev/null
+++ b/src/H5FDs3comms.h
@@ -0,0 +1,634 @@
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
+ * Read-Only S3 Virtual File Driver (VFD) *
+ * Copyright (c) 2017-2018, The HDF Group. *
+ * *
+ * All rights reserved. *
+ * *
+ * NOTICE: *
+ * All information contained herein is, and remains, the property of The HDF *
+ * Group. The intellectual and technical concepts contained herein are *
+ * proprietary to The HDF Group. Dissemination of this information or *
+ * reproduction of this material is strictly forbidden unless prior written *
+ * permission is obtained from The HDF Group. *
+ * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
+
+/*****************************************************************************
+ *
+ * This is the header for the S3 Communications module
+ *
+ * ***NOT A FILE DRIVER***
+ *
+ * Purpose:
+ *
+ * - Provide structures and functions related to communicating with
+ * Amazon S3 (Simple Storage Service).
+ * - Abstract away the REST API (HTTP,
+ * networked communications) behind a series of uniform function calls.
+ * - Handle AWS4 authentication, if appropriate.
+ * - Fail predictably in event of errors.
+ * - Eventually, support more S3 operations, such as creating, writing to,
+ * and removing Objects remotely.
+ *
+ * translates:
+ * `read(some_file, bytes_offset, bytes_length, &dest_buffer);`
+ * to:
+ * ```
+ * GET myfile HTTP/1.1
+ * Host: somewhere.me
+ * Range: bytes=4096-5115
+ * ```
+ * and places received bytes from HTTP response...
+ * ```
+ * HTTP/1.1 206 Partial-Content
+ * Content-Range: 4096-5115/63239
+ *
+ * <bytes>
+ * ```
+ * ...in destination buffer.
+ *
+ * TODO: put documentation in a consistent place and point to it from here.
+ *
+ * Programmer: Jacob Smith
+ * 2017-11-30
+ *
+ *****************************************************************************/
+
+#include <ctype.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef H5_HAVE_ROS3_VFD
+#include <curl/curl.h>
+#include <openssl/evp.h>
+#include <openssl/hmac.h>
+#include <openssl/sha.h>
+#endif /* ifdef H5_HAVE_ROS3_VFD */
+
+/*****************
+ * PUBLIC MACROS *
+ *****************/
+
+/* hexadecimal string of pre-computed sha256 checksum of the empty string
+ * hex(sha256sum(""))
+ */
+#define EMPTY_SHA256 \
+"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+
+/* string length (plus null terminator)
+ * example ISO8601-format string: "20170713T145903Z" (YYYYmmdd'T'HHMMSS'_')
+ */
+#define ISO8601_SIZE 17
+
+/* string length (plus null terminator)
+ * example RFC7231-format string: "Fri, 30 Jun 2017 20:41:55 GMT"
+ */
+#define RFC7231_SIZE 30
+
+/*---------------------------------------------------------------------------
+ *
+ * Macro: ISO8601NOW()
+ *
+ * Purpose:
+ *
+ * write "YYYYmmdd'T'HHMMSS'Z'" (less single-quotes) to dest
+ * e.g., "20170630T204155Z"
+ *
+ * wrapper for strftime()
+ *
+ * It is left to the programmer to check return value of
+ * ISO8601NOW (should equal ISO8601_SIZE - 1).
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-??
+ *
+ *---------------------------------------------------------------------------
+ */
+#define ISO8601NOW(dest, now_gm) \
+strftime((dest), ISO8601_SIZE, "%Y%m%dT%H%M%SZ", (now_gm))
+
+/*---------------------------------------------------------------------------
+ *
+ * Macro: RFC7231NOW()
+ *
+ * Purpose:
+ *
+ * write "Day, dd Mmm YYYY HH:MM:SS GMT" to dest
+ * e.g., "Fri, 30 Jun 2017 20:41:55 GMT"
+ *
+ * wrapper for strftime()
+ *
+ * It is left to the programmer to check return value of
+ * RFC7231NOW (should equal RFC7231_SIZE - 1).
+ *
+ * Programmer: Jacob Smith
+ * 2017-07-??
+ *
+ *---------------------------------------------------------------------------
+ */
+#define RFC7231NOW(dest, now_gm) \
+strftime((dest), RFC7231_SIZE, "%a, %d %b %Y %H:%M:%S GMT", (now_gm))
+
+
+/* Reasonable maximum length of a credential string.
+ * Provided for error-checking S3COMMS_FORMAT_CREDENTIAL (below).
+ * 17 <- "////aws4_request\0"
+ * 2 < "s3" (service)
+ * 8 <- "YYYYmmdd" (date)
+ * 128 <- (access_id)
+ * 155 :: sum
+ */
+#define S3COMMS_MAX_CREDENTIAL_SIZE 155
+
+
+/*---------------------------------------------------------------------------
+ *
+ * Macro: H5FD_S3COMMS_FORMAT_CREDENTIAL()
+ *
+ * Purpose:
+ *
+ * Format "S3 Credential" string from inputs, for AWS4.
+ *
+ * Wrapper for HDsnprintf().
+ *
+ * _HAS NO ERROR-CHECKING FACILITIES_
+ * It is left to programmer to ensure that return value confers success.
+ * e.g.,
+ * ```
+ * assert( S3COMMS_MAX_CREDENTIAL_SIZE >=
+ * S3COMMS_FORMAT_CREDENTIAL(...) );
+ * ```
+ *
+ * "<access-id>/<date>/<aws-region>/<aws-service>/aws4_request"
+ * assuming that `dest` has adequate space.
+ *
+ * ALL inputs must be null-terminated strings.
+ *
+ * `access` should be the user's access key ID.
+ * `date` must be of format "YYYYmmdd".
+ * `region` should be relevant AWS region, i.e. "us-east-1".
+ * `service` should be "s3".
+ *
+ * Programmer: Jacob Smith
+ * 2017-09-19
+ *
+ * Changes: None.
+ *
+ *---------------------------------------------------------------------------
+ */
+#define S3COMMS_FORMAT_CREDENTIAL(dest, access, iso8601_date, region, service) \
+HDsnprintf((dest), S3COMMS_MAX_CREDENTIAL_SIZE, \
+ "%s/%s/%s/%s/aws4_request", \
+ (access), (iso8601_date), (region), (service))
+
+/*********************
+ * PUBLIC STRUCTURES *
+ *********************/
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Structure: hrb_node_t
+ *
+ * HTTP Header Field Node
+ *
+ *
+ *
+ * Maintain a ordered (linked) list of HTTP Header fields.
+ *
+ * Provides efficient access and manipulation of a logical sequence of
+ * HTTP header fields, of particular use when composing an
+ * "S3 Canonical Request" for authentication.
+ *
+ * - The creation of a Canoncial Request involves:
+ * - convert field names to lower case
+ * - sort by this lower-case name
+ * - convert ": " name-value separator in HTTP string to ":"
+ * - get sorted lowercase names without field or separator
+ *
+ * As HTTP headers allow headers in any order (excepting the case of multiple
+ * headers with the same name), the list ordering can be optimized for Canonical
+ * Request creation, suggesting alphabtical order. For more expedient insertion
+ * and removal of elements in the list, linked list seems preferable to a
+ * dynamically-expanding array. The usually-smaller number of entries (5 or
+ * fewer) makes performance overhead of traversing the list trivial.
+ *
+ * The above requirements of creating at Canonical Request suggests a reasonable
+ * trade-off of speed for space with the option to compute elements as needed
+ * or to have the various elements prepared and stored in the structure
+ * (e.g. name, value, lowername, concatenated name:value)
+ * The structure currently is implemented to pre-compute.
+ *
+ * At all times, the "first" node of the list should be the least,
+ * alphabetically. For all nodes, the `next` node should be either NULL or
+ * of greater alphabetical value.
+ *
+ * Each node contains its own header field information, plus a pointer to the
+ * next node.
+ *
+ * It is not allowed to have multiple nodes with the same _lowercase_ `name`s
+ * in the same list
+ * (i.e., name is case-insensitive for access and modification.)
+ *
+ * All data (`name`, `value`, `lowername`, and `cat`) are null-terminated
+ * strings allocated specifically for their node.
+ *
+ *
+ *
+ * `magic` (unsigned long)
+ *
+ * "unique" idenfier number for the structure type
+ *
+ * `name` (char *)
+ *
+ * Case-meaningful name of the HTTP field.
+ * Given case is how it is supplied to networking code.
+ * e.g., "Range"
+ *
+ * `lowername` (char *)
+ *
+ * Lowercase copy of name.
+ * e.g., "range"
+ *
+ * `value` (char *)
+ *
+ * Case-meaningful value of HTTP field.
+ * e.g., "bytes=0-9"
+ *
+ * `cat` (char *)
+ *
+ * Concatenated, null-terminated string of HTTP header line,
+ * as the field would appear in an HTTP request.
+ * e.g., "Range: bytes=0-9"
+ *
+ * `next` (hrb_node_t *)
+ *
+ * Pointers to next node in the list, or NULL sentinel as end of list.
+ * Next node must have a greater `lowername` as determined by strcmp().
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ * 2017-09-22
+ *
+ * Changes:
+ *
+ * - Change from twin doubly-linked lists to singly-linked list.
+ * --- Jake Smith 2017-01-17
+ *
+ *----------------------------------------------------------------------------
+ */
+typedef struct hrb_node_t {
+ unsigned long magic;
+ char *name;
+ char *value;
+ char *cat;
+ char *lowername;
+ struct hrb_node_t *next;
+} hrb_node_t;
+#define S3COMMS_HRB_NODE_MAGIC 0x7F5757UL
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Structure: hrb_t
+ *
+ * HTTP Request Buffer structure
+ *
+ *
+ *
+ * Logically represent an HTTP request
+ *
+ * GET /myplace/myfile.h5 HTTP/1.1
+ * Host: over.rainbow.oz
+ * Date: Fri, 01 Dec 2017 12:35:04 CST
+ *
+ * <body>
+ *
+ * ...with fast, efficient access to and modification of primary and field
+ * elements.
+ *
+ * Structure for building HTTP requests while hiding much of the string
+ * processing required "under the hood."
+ *
+ * Information about the request target -- the first line -- and the body text,
+ * if any, are managed directly with this structure. All header fields, e.g.,
+ * "Host" and "Date" above, are created with a linked list of `hrb_node_t` and
+ * included in the request by a pointer to the head of the list.
+ *
+ *
+ *
+ * `magic` (unsigned long)
+ *
+ * "Magic" number confirming that this is an hrb_t structure and
+ * what operations are valid for it.
+ *
+ * Must be S3COMMS_HRB_MAGIC to be valid.
+ *
+ * `body` (char *) :
+ *
+ * Pointer to start of HTTP body.
+ *
+ * Can be NULL, in which case it is treated as the empty string, "".
+ *
+ * `body_len` (size_t) :
+ *
+ * Number of bytes (characters) in `body`. 0 if empty or NULL `body`.
+ *
+ * `first_header` (hrb_node_t *) :
+ *
+ * Pointer to first SORTED header node, if any.
+ * It is left to the programmer to ensure that this node and associated
+ * list is destroyed when done.
+ *
+ * `resource` (char *) :
+ *
+ * Pointer to resource URL string, e.g., "/folder/page.xhtml".
+ *
+ * `verb` (char *) :
+ *
+ * Pointer to HTTP verb string, e.g., "GET".
+ *
+ * `version` (char *) :
+ *
+ * Pointer to HTTP version string, e.g., "HTTP/1.1".
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ *
+ *----------------------------------------------------------------------------
+ */
+typedef struct {
+ unsigned long magic;
+ char *body;
+ size_t body_len;
+ hrb_node_t *first_header;
+ char *resource;
+ char *verb;
+ char *version;
+} hrb_t;
+#define S3COMMS_HRB_MAGIC 0x6DCC84UL
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Structure: parsed_url_t
+ *
+ *
+ * Represent a URL with easily-accessed pointers to logical elements within.
+ * These elements (components) are stored as null-terminated strings (or just
+ * NULLs). These components should be allocated for the structure, making the
+ * data as safe as possible from modification. If a component is NULL, it is
+ * either implicit in or absent from the URL.
+ *
+ * "http://mybucket.s3.amazonaws.com:8080/somefile.h5?param=value&arg=value"
+ * ^--^ ^-----------------------^ ^--^ ^---------^ ^-------------------^
+ * Scheme Host Port Resource Query/-ies
+ *
+ *
+ *
+ * `magic` (unsigned long)
+ *
+ * Structure identification and validation identifier.
+ * Identifies as `parsed_url_t` type.
+ *
+ * `scheme` (char *)
+ *
+ * String representing which protocol is to be expected.
+ * _Must_ be present.
+ * "http", "https", "ftp", e.g.
+ *
+ * `host` (char *)
+ *
+ * String of host, either domain name, IPv4, or IPv6 format.
+ * _Must_ be present.
+ * "over.rainbow.oz", "192.168.0.1", "[0000:0000:0000:0001]"
+ *
+ * `port` (char *)
+ *
+ * String representation of specified port. Must resolve to a valid unsigned
+ * integer.
+ * "9000", "80"
+ *
+ * `path` (char *)
+ *
+ * Path to resource on host. If not specified, assumes root "/".
+ * "lollipop_guild.wav", "characters/witches/white.dat"
+ *
+ * `query` (char *)
+ *
+ * Single string of all query parameters in url (if any).
+ * "arg1=value1&arg2=value2"
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ *
+ *----------------------------------------------------------------------------
+ */
+typedef struct {
+ unsigned long magic;
+ char *scheme; /* required */
+ char *host; /* required */
+ char *port;
+ char *path;
+ char *query;
+} parsed_url_t;
+#define S3COMMS_PARSED_URL_MAGIC 0x21D0DFUL
+
+
+/*----------------------------------------------------------------------------
+ *
+ * Structure: s3r_t
+ *
+ *
+ *
+ * S3 request structure "handle".
+ *
+ * Holds persistent information for Amazon S3 requests.
+ *
+ * Instantiated through `H5FD_s3comms_s3r_open()`, copies data into self.
+ *
+ * Intended to be re-used for operations on a remote object.
+ *
+ * Cleaned up through `H5FD_s3comms_s3r_close()`.
+ *
+ * _DO NOT_ share handle between threads: curl easy handle `curlhandle` has
+ * undefined behavior if called to perform in multiple threads.
+ *
+ *
+ *
+ * `magic` (unsigned long)
+ *
+ * "magic" number identifying this structure as unique type.
+ * MUST equal `S3R_MAGIC` to be valid.
+ *
+ * `curlhandle` (CURL)
+ *
+ * Pointer to the curl_easy handle generated for the request.
+ *
+ * `httpverb` (char *)
+ *
+ * Pointer to NULL-terminated string. HTTP verb,
+ * e.g. "GET", "HEAD", "PUT", etc.
+ *
+ * Default is NULL, resulting in a "GET" request.
+ *
+ * `purl` (parsed_url_t *)
+ *
+ * Pointer to structure holding the elements of URL for file open.
+ *
+ * e.g., "http://bucket.aws.com:8080/myfile.dat?q1=v1&q2=v2"
+ * parsed into...
+ * { scheme: "http"
+ * host: "bucket.aws.com"
+ * port: "8080"
+ * path: "myfile.dat"
+ * query: "q1=v1&q2=v2"
+ * }
+ *
+ * Cannot be NULL.
+ *
+ * `region` (char *)
+ *
+ * Pointer to NULL-terminated string, specifying S3 "region",
+ * e.g., "us-east-1".
+ *
+ * Required to authenticate.
+ *
+ * `secret_id` (char *)
+ *
+ * Pointer to NULL-terminated string for "secret" access id to S3 resource.
+ *
+ * Requred to authenticate.
+ *
+ * `signing_key` (unsigned char *)
+ *
+ * Pointer to `SHA256_DIGEST_LENGTH`-long string for "re-usable" signing
+ * key, generated via
+ * `HMAC-SHA256(HMAC-SHA256(HMAC-SHA256(HMAC-SHA256("AWS4<secret_key>",
+ * "<yyyyMMDD"), "<aws-region>"), "<aws-service>"), "aws4_request")`
+ * which may be re-used for several (up to seven (7)) days from creation?
+ * Computed once upon file open.
+ *
+ * Requred to authenticate.
+ *
+ *
+ *
+ * Programmer: Jacob Smith
+ *
+ *----------------------------------------------------------------------------
+ */
+typedef struct {
+ unsigned long magic;
+#ifdef H5_HAVE_ROS3_VFD
+ CURL *curlhandle;
+ size_t filesize;
+ char *httpverb;
+ parsed_url_t *purl;
+ char *region;
+ char *secret_id;
+ unsigned char *signing_key;
+#endif /* ifdef H5_HAVE_ROS3_VFD */
+} s3r_t;
+#define S3COMMS_S3R_MAGIC 0x44d8d79
+
+/*******************************************
+ * DECLARATION OF HTTP FIELD LIST ROUTINES *
+ *******************************************/
+
+herr_t H5FD_s3comms_hrb_node_set(hrb_node_t **L,
+ const char *name,
+ const char *value);
+
+/***********************************************
+ * DECLARATION OF HTTP REQUEST BUFFER ROUTINES *
+ ***********************************************/
+
+herr_t H5FD_s3comms_hrb_destroy(hrb_t **buf);
+
+hrb_t * H5FD_s3comms_hrb_init_request(const char *verb,
+ const char *resource,
+ const char *host);
+
+/*************************************
+ * DECLARATION OF S3REQUEST ROUTINES *
+ *************************************/
+
+H5_DLL herr_t H5FD_s3comms_s3r_close(s3r_t *handle);
+
+H5_DLL size_t H5FD_s3comms_s3r_get_filesize(s3r_t *handle);
+
+H5_DLL s3r_t * H5FD_s3comms_s3r_open(const char url[],
+ const char region[],
+ const char id[],
+ const unsigned char signing_key[]);
+
+H5_DLL herr_t H5FD_s3comms_s3r_read(s3r_t *handle,
+ haddr_t offset,
+ size_t len,
+ void *dest);
+
+/*********************************
+ * DECLARATION OF OTHER ROUTINES *
+ *********************************/
+
+H5_DLL struct tm * gmnow(void);
+
+herr_t H5FD_s3comms_aws_canonical_request(char *canonical_request_dest,
+ char *signed_headers_dest,
+ hrb_t *http_request);
+
+H5_DLL herr_t H5FD_s3comms_bytes_to_hex(char *dest,
+ const unsigned char *msg,
+ size_t msg_len,
+ hbool_t lowercase);
+
+herr_t H5FD_s3comms_free_purl(parsed_url_t *purl);
+
+herr_t H5FD_s3comms_HMAC_SHA256(const unsigned char *key,
+ size_t key_len,
+ const char *msg,
+ size_t msg_len,
+ char *dest);
+
+herr_t H5FD_s3comms_load_aws_profile(const char *name,
+ char *key_id_out,
+ char *secret_access_key_out,
+ char *aws_region_out);
+
+herr_t H5FD_s3comms_nlowercase(char *dest,
+ const char *s,
+ size_t len);
+
+herr_t H5FD_s3comms_parse_url(const char *str,
+ parsed_url_t **purl);
+
+herr_t H5FD_s3comms_percent_encode_char(char *repr,
+ const unsigned char c,
+ size_t *repr_len);
+
+H5_DLL herr_t H5FD_s3comms_signing_key(unsigned char *md,
+ const char *secret,
+ const char *region,
+ const char *iso8601now);
+
+herr_t H5FD_s3comms_tostringtosign(char *dest,
+ const char *req_str,
+ const char *now,
+ const char *region);
+
+H5_DLL herr_t H5FD_s3comms_trim(char *dest,
+ char *s,
+ size_t s_len,
+ size_t *n_written);
+
+H5_DLL herr_t H5FD_s3comms_uriencode(char *dest,
+ const char *s,
+ size_t s_len,
+ hbool_t encode_slash,
+ size_t *n_written);
+
+
diff --git a/src/Makefile.am b/src/Makefile.am
index 0eaae1a..f737d5d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -63,8 +63,8 @@ libhdf5_la_SOURCES= H5.c H5checksum.c H5dbg.c H5system.c H5timer.c H5trace.c \
H5FA.c H5FAcache.c H5FAdbg.c H5FAdblock.c H5FAdblkpage.c H5FAhdr.c \
H5FAint.c H5FAstat.c H5FAtest.c \
H5FD.c H5FDcore.c \
- H5FDfamily.c H5FDint.c H5FDlog.c \
- H5FDmulti.c H5FDsec2.c H5FDspace.c H5FDstdio.c H5FDtest.c \
+ H5FDfamily.c H5FDhdfs.c H5FDint.c H5FDlog.c H5FDs3comms.c \
+ H5FDmulti.c H5FDros3.c H5FDsec2.c H5FDspace.c H5FDstdio.c H5FDtest.c \
H5FL.c H5FO.c H5FS.c H5FScache.c H5FSdbg.c H5FSint.c H5FSsection.c \
H5FSstat.c H5FStest.c \
H5G.c H5Gbtree2.c H5Gcache.c \
@@ -138,8 +138,8 @@ include_HEADERS = hdf5.h H5api_adpt.h H5overflow.h H5pubconf.h H5public.h H5vers
H5Cpublic.h H5Dpublic.h \
H5Epubgen.h H5Epublic.h H5ESpublic.h H5Fpublic.h \
H5FDpublic.h H5FDcore.h H5FDdirect.h \
- H5FDfamily.h H5FDlog.h H5FDmpi.h H5FDmpio.h \
- H5FDmulti.h H5FDsec2.h H5FDstdio.h H5FDwindows.h \
+ H5FDfamily.h H5FDhdfs.h H5FDlog.h H5FDmpi.h H5FDmpio.h \
+ H5FDmulti.h H5FDros3.h H5FDsec2.h H5FDstdio.h H5FDwindows.h \
H5Gpublic.h H5Ipublic.h H5Lpublic.h \
H5MMpublic.h H5Opublic.h H5Ppublic.h \
H5PLextern.h H5PLpublic.h \
diff --git a/src/hdf5.h b/src/hdf5.h
index c12037f..2201e9e 100644
--- a/src/hdf5.h
+++ b/src/hdf5.h
@@ -40,16 +40,18 @@
#include "H5Zpublic.h" /* Data filters */
/* Predefined file drivers */
-#include "H5FDcore.h" /* Files stored entirely in memory */
-#include "H5FDdirect.h" /* Linux direct I/O */
-#include "H5FDfamily.h" /* File families */
+#include "H5FDcore.h" /* Files stored entirely in memory */
+#include "H5FDdirect.h" /* Linux direct I/O */
+#include "H5FDfamily.h" /* File families */
+#include "H5FDhdfs.h" /* Hadoop HDFS */
#include "H5FDlog.h" /* sec2 driver with I/O logging (for debugging) */
-#include "H5FDmpi.h" /* MPI-based file drivers */
-#include "H5FDmulti.h" /* Usage-partitioned file family */
-#include "H5FDsec2.h" /* POSIX unbuffered file I/O */
-#include "H5FDstdio.h" /* Standard C buffered I/O */
+#include "H5FDmpi.h" /* MPI-based file drivers */
+#include "H5FDmulti.h" /* Usage-partitioned file family */
+#include "H5FDros3.h" /* R/O S3 "file" I/O */
+#include "H5FDsec2.h" /* POSIX unbuffered file I/O */
+#include "H5FDstdio.h" /* Standard C buffered I/O */
#ifdef H5_HAVE_WINDOWS
-#include "H5FDwindows.h" /* Win32 I/O */
+#include "H5FDwindows.h" /* Win32 I/O */
#endif
/* Virtual object layer (VOL) connectors */
diff --git a/src/libhdf5.settings.in b/src/libhdf5.settings.in
index f856ebc..baa99ea 100644
--- a/src/libhdf5.settings.in
+++ b/src/libhdf5.settings.in
@@ -79,6 +79,8 @@ Parallel Filtered Dataset Writes: @PARALLEL_FILTERED_WRITES@
I/O filters (external): @EXTERNAL_FILTERS@
MPE: @MPE@
Direct VFD: @DIRECT_VFD@
+ (Read-Only) S3 VFD: @ROS3_VFD@
+ (Read-Only) HDFS VFD: @HAVE_LIBHDFS@
dmalloc: @HAVE_DMALLOC@
Packages w/ extra debug output: @INTERNAL_DEBUG_OUTPUT@
API tracing: @TRACE_API@