#!/usr/bin/perl -w
##
# Copyright by The HDF Group.
# Copyright by the Board of Trustees of the University of Illinois.
# All rights reserved.
#
# This file is part of HDF5.  The full HDF5 copyright notice, including
# terms governing use, modification, and redistribution, is contained in
# the COPYING file, which can be found at the root of the source code
# distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.
# If you do not have access to either file, you may request a copy from
# help@hdfgroup.org.
##
require 5.003;
$Source = "";

##############################################################################
# A map from type name to type letter.  We use this map for two reasons:
#  1. We want the debugging stuff in the source code to be as unobtrusive as
#     possible, which means as compact as possible.
#  2. It's easier (faster) to parse these one and two-letter types in the C
#     functions that display debugging results.
#
# All type strings are one or two characters.  One-character strings
# are always lower case and should be used for common types.
# Two-character strings begin with an upper-case letter which is
# usually the same as the package name.
#
%TypeString = ("haddr_t"                    => "a",
               "hbool_t"                    => "b",
               "double"                     => "d",
               "H5D_alloc_time_t"           => "Da",
               "H5FD_mpio_collective_opt_t" => "Dc",
               "H5D_fill_time_t"            => "Df",
               "H5D_fill_value_t"           => "DF",
               "H5FD_mpio_chunk_opt_t"      => "Dh",
               "H5D_mpio_actual_io_mode_t"  => "Di",
               "H5D_chunk_index_t"          => "Dk",
               "H5D_layout_t"               => "Dl",
               "H5D_mpio_no_collective_cause_t" => "Dn",
               "H5D_mpio_actual_chunk_opt_mode_t" => "Do",
               "H5D_space_status_t"         => "Ds",
               "H5D_vds_view_t"             => "Dv",
               "H5FD_mpio_xfer_t"           => "Dt",
               "herr_t"                     => "e",
               "H5E_direction_t"            => "Ed",
               "H5E_error_t"                => "Ee",
               "H5E_type_t"                 => "Et",
               "H5ES_status_t"              => "Es",
               "H5F_close_degree_t"         => "Fd",
               "H5F_fspace_strategy_t"      => "Ff",
               "H5F_file_space_type_t"      => "Ff",
               "H5F_mem_t"                  => "Fm",
               "H5F_scope_t"                => "Fs",
               "H5F_fspace_type_t"          => "Ft",
               "H5F_libver_t"               => "Fv",
               "H5G_obj_t"                  => "Go",
               "H5G_stat_t"                 => "Gs",
               "hsize_t"                    => "h",
               "hssize_t"                   => "Hs",
               "H5E_major_t"                => "i",
               "H5E_minor_t"                => "i",
               "H5_iter_order_t"            => "Io",
               "H5_index_t"                 => "Ii",
               "hid_t"                      => "i",
               "int"                        => "Is",
               "int32_t"                    => "Is",
               "unsigned"                   => "Iu",
               "unsigned int"               => "Iu",
               "uint32_t"                   => "Iu",
               "uint64_t"                   => "UL",
               "H5I_type_t"                 => "It",
               "H5G_link_t"                 => "Ll", #Same as H5L_type_t now
               "H5L_type_t"                 => "Ll",
               "MPI_Comm"                   => "Mc",
               "MPI_Info"                   => "Mi",
               "H5FD_mem_t"                 => "Mt",
               "off_t"                      => "o",
               "H5O_type_t"                 => "Ot",
               "H5P_class_t"                => "p",
               "hobj_ref_t"                 => "r",
               "H5R_type_t"                 => "Rt",
               "char"                       => "s",
               "unsigned char"              => "s",
               "H5S_class_t"                => "Sc",
               "H5S_seloper_t"              => "Ss",
               "H5S_sel_type"               => "St",
               "htri_t"                     => "t",
               "H5T_cset_t",                => "Tc",
               "H5T_direction_t",           => "Td",
               "H5T_norm_t"                 => "Tn",
               "H5T_order_t"                => "To",
               "H5T_pad_t"                  => "Tp",
               "H5T_pers_t"                 => "Te",
               "H5T_sign_t"                 => "Ts",
               "H5T_class_t"                => "Tt",
               "H5T_str_t"                  => "Tz",
               "unsigned long"              => "Ul",
               "unsigned long long"         => "UL",
               "H5VL_attr_get_t"            => "Va",
               "H5VL_attr_specific_t"       => "Vb",
               "H5VL_class_value_t"         => "VC",
               "H5VL_dataset_get_t"         => "Vc",
               "H5VL_dataset_specific_t"    => "Vd",
               "H5VL_datatype_get_t"        => "Ve",
               "H5VL_datatype_specific_t"   => "Vf",
               "H5VL_file_get_t"            => "Vg",
               "H5VL_file_specific_t"       => "Vh",
               "H5VL_group_get_t"           => "Vi",
               "H5VL_group_specific_t"      => "Vj",
               "H5VL_link_create_type_t"    => "Vk",
               "H5VL_link_get_t"            => "Vl",
               "H5VL_link_specific_t"       => "Vm",
               "H5VL_object_get_t"          => "Vn",
               "H5VL_object_specific_t"     => "Vo",
               "H5VL_request_specific_t"    => "Vr",
               "void"                       => "x",
               "FILE"                       => "x",
               "H5A_operator_t"             => "x",
               "H5A_operator1_t"            => "x",
               "H5A_operator2_t"            => "x",
               "H5A_info_t"                 => "x",
               "H5AC_cache_config_t"        => "x",
               "H5AC_cache_image_config_t"  => "x",
               "H5D_append_cb_t"            => "x",
               "H5D_gather_func_t"          => "x",
               "H5D_operator_t"             => "x",
               "H5D_scatter_func_t"         => "x",
               "H5E_auto_t"                 => "x",
               "H5E_auto1_t"                => "x",
               "H5E_auto2_t"                => "x",
               "H5E_walk_t"                 => "x",
               "H5E_walk1_t"                => "x",
               "H5E_walk2_t"                => "x",
               "H5F_flush_cb_t"             => "x",
               "H5F_info1_t"                => "x",
               "H5F_info2_t"                => "x",
               "H5F_retry_info_t"           => "x",
               "H5FD_t"                     => "x",
               "H5FD_class_t"               => "x",
               "H5FD_stream_fapl_t"         => "x",
               "H5FD_ros3_fapl_t"           => "x",
               "H5FD_hdfs_fapl_t"           => "x",
               "H5FD_file_image_callbacks_t" => "x",
               "H5G_iterate_t"              => "x",
               "H5G_info_t"                 => "x",
               "H5I_free_t"                 => "x",
               "H5I_iterate_func_t"         => "x",
               "H5I_search_func_t"          => "x",
               "H5L_class_t"                => "x",
               "H5L_elink_traverse_t"       => "x",
               "H5L_iterate_t"              => "x",
               "H5M_iterate_t"              => 'x',
               "H5MM_allocate_t"            => "x",
               "H5MM_free_t"                => "x",
               "H5O_info_t"                 => "x",
               "H5O_iterate_t"              => "x",
               "H5O_mcdt_search_cb_t"       => "x",
               "H5P_cls_create_func_t"      => "x",
               "H5P_cls_copy_func_t"        => "x",
               "H5P_cls_close_func_t"       => "x",
               "H5P_iterate_t"              => "x",
               "H5P_prp_create_func_t"      => "x",
               "H5P_prp_copy_func_t"        => "x",
               "H5P_prp_close_func_t"       => "x",
               "H5P_prp_delete_func_t"      => "x",
               "H5P_prp_get_func_t"         => "x",
               "H5P_prp_set_func_t"         => "x",
               "H5P_prp_compare_func_t"     => "x",
               "H5T_cdata_t"                => "x",
               "H5T_conv_t"                 => "x",
               "H5T_conv_except_func_t"     => "x",
               "H5VL_t"                     => "x",
               "H5VL_class_t"               => "x",
               "H5VL_loc_params_t"          => "x",
               "H5VL_request_notify_t"      => "x",
               "H5Z_func_t"                 => "x",
               "H5Z_filter_func_t"          => "x",
               "va_list"                    => "x",
               "size_t"                     => "z",
               "H5Z_SO_scale_type_t"        => "Za",
               "H5Z_class_t"                => "Zc",
               "H5Z_EDC_t"                  => "Ze",
               "H5Z_filter_t"               => "Zf",
               "ssize_t"                    => "Zs",
              );

##############################################################################
# Print an error message.
#
my $found_errors = 0;

sub errmesg ($$@) {
  my ($file, $func, @mesg) = @_;
  my ($mesg) = join "", @mesg;
  my ($lineno) = 1;
  if ($Source =~ /(.*?\n)($func)/s) {
    local $_ = $1;
    $lineno = tr/\n/\n/;
  }

  $found_errors = 1;

  print "$file: in function \`$func\':\n";
  print "$file:$lineno: $mesg\n";
}

##############################################################################
# Given a C data type return the type string that goes with it.
#
sub argstring ($$$) {
  my ($file, $func, $atype) = @_;
  my ($ptr, $tstr, $array) = (0, "!", "");
  my ($fq_atype);

  # Normalize the data type by removing redundant white space,
  # certain type qualifiers, and indirection.
  $atype =~ s/^\bconst\b//;
  $atype =~ s/\bH5_ATTR_UNUSED\b//g;
  $atype =~ s/\s+/ /g;
  $ptr = length $1 if  $atype =~ s/(\*+)//;
  $atype =~ s/^\s+//;
  $atype =~ s/\s+$//;
  if ($atype =~ /(.*)\[(.*)\]$/) {
    ($array, $atype) = ($2, $1);
    $atype =~ s/\s+$//;
  }
  $fq_atype = $atype . ('*' x $ptr);

  if ($ptr>0 && exists $TypeString{$fq_atype}) {
    $ptr = 0;
    $tstr = $TypeString{$fq_atype};
  } elsif ($ptr>0 && exists $TypeString{"$atype*"}) {
    --$ptr;
    $tstr = $TypeString{"$atype*"};
  } elsif (!exists $TypeString{$atype}) {
    errmesg $file, $func, "untraceable type \`$atype", '*'x$ptr, "\'";
  } else {
    $tstr = $TypeString{$atype};
  }
  return ("*" x $ptr) . ($array?"[$array]":"") . $tstr;
}

##############################################################################
# Given information about an API function, rewrite that function with
# updated tracing information.
#
sub rewrite_func ($$$$$) {
  my ($file, $type, $name, $args, $body) = @_;
  my ($arg,$trace);
  my (@arg_name, @arg_str);
  local $_;

  # Parse return value
  my $rettype = argstring $file, $name, $type;
  goto error if $rettype =~ /!/;

  # Parse arguments
  if ($args eq "void") {
    $trace = "H5TRACE0(\"$rettype\",\"\");\n";
  } else {
    # Split arguments.  First convert `/*in,out*/' to get rid of the
    # comma, then split the arguments on commas.
    $args =~ s/(\/\*\s*in),\s*(out\s*\*\/)/$1_$2/g;
    my @args = split /,[\s\n]*/, $args;
    my $argno = 0;
    my %names;

    for $arg (@args) {
      if($arg=~/\w*\.{3}\w*/){
        next;
      }
      unless ($arg=~/^(([a-z_A-Z]\w*\s+)+\**)
              ([a-z_A-Z]\w*)(\[.*?\])?
              (\s*\/\*\s*(in|out|in_out)\s*\*\/)?\s*$/x) {
        errmesg $file, $name, "unable to parse \`$arg\'";
        goto error;
      } else {
        my ($atype, $aname, $array, $adir) = ($1, $3, $4, $6);
        $names{$aname} = $argno++;
        $adir ||= "in";
        $atype =~ s/\s+$//;
        push @arg_name, $aname;

        if ($adir eq "out") {
          push @arg_str, "x";
        } else {
          if (defined $array) {
            $atype .= "*";
            if ($array =~ /^\[\/\*([a-z_A-Z]\w*)\*\/\]$/) {
              my $asize = $1;
              if (exists $names{$asize}) {
                $atype .= '[a' . $names{$asize} . ']';
              } else {
                warn "bad array size: $asize";
                $atype .= "*";
              }
            }
          }
          push @arg_str, argstring $file, $name, $atype;
        }
      }
    }
    $trace = "H5TRACE" . scalar(@arg_str) . "(\"$rettype\", \"";
    $trace .= join("", @arg_str) . "\"";
    my $len = 4 + length $trace;
    for (@arg_name) {
      if ($len + length >= 77) {
        $trace .= ",\n             $_";
        $len = 13 + length;
      } else {
        $trace .= ", $_";
        $len += 1 + length;
      }
    }
    $trace .= ");\n";
  }
  goto error if grep {/!/} @arg_str;

  # The H5TRACE() statement
  if ($body =~ /\/\*[ \t]*NO[ \t]*TRACE[ \t]*\*\//) {
    # Ignored due to NO TRACE comment.
  } elsif ($body =~ s/((\n[ \t]*)H5TRACE\d+\s*\(.*?\);)\n/"$2$trace"/es) {
    # Replaced an H5TRACE macro.
  } elsif ($body=~s/((\n[ \t]*)FUNC_ENTER\w*[ \t]*(\(.*?\))?;??)\n/"$1$2$trace"/es) {
    # Added an H5TRACE macro after a FUNC_ENTER macro.
  } else {
    errmesg $file, $name, "unable to insert tracing information";
    print "body = ", $body, "\n";
    goto error;
  }

  
 error:
  return "\n$type\n$name($args)\n$body";
}

##############################################################################
# Process each source file, rewriting API functions with updated
# tracing information.
#
my $total_api = 0;
for $file (@ARGV) {
  # Ignore some files that do not need tracing macros
  unless ($file eq "H5FDmulti.c" or $file eq "src/H5FDmulti.c" or $file eq "H5FDstdio.c" or $file eq "src/H5FDstdio.c") {
  
    # Snarf up the entire file
    open SOURCE, $file or die "$file: $!\n";
    $Source = join "", <SOURCE>;
    close SOURCE;

    # Make modifications
    my $original = $Source;
    my $napi = $Source =~ s/\n([A-Za-z]\w*(\s+[A-Za-z]\w*)*\s*\**)\n #type
                              (H5[A-Z]{0,2}[^_A-Z0-9]\w*)      #name
                              \s*\((.*?)\)\s*               #args
                              (\{.*?\n\}[^\n]*)             #body
                           /rewrite_func($file,$1,$3,$4,$5)/segx;
    $total_api += $napi;

# If the source changed then print out the new version
    if ($original ne $Source) {
      printf "%s: instrumented %d API function%s\n", 
             $file, $napi, 1==$napi?"":"s";
      rename $file, "$file~" or die "unable to make backup";
      open SOURCE, ">$file" or die "unable to modify source";
      print SOURCE $Source;
      close SOURCE;
    }
  }
}

if ($found_errors eq 1) {
    printf "\n";
    printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
    printf "*** ERRORS FOUND *** ERRORS FOUND *** ERRORS FOUND ****\n";
    printf "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n";
    exit 1;
} else {
    printf "Finished processing HDF5 API calls\n";
}