#!/usr/bin/perl -w
require 5.003;
$Source = "";

##############################################################################
# A map from type name to type letter.  We use this map for two reasons:
#  1. We want the debugging stuff in the source code to be as unobtrusive as
#     possible, which means as compact as possible.
#  2. It's easier (faster) to parse these one and two-letter types in the C
#     functions that display debugging results.
#
# All type strings are one or two characters.  One-character strings
# are always lower case and should be used for common types.
# Two-character strings begin with an upper-case letter which is
# usually the same as the package name.
#
%TypeString = ("haddr_t"                    => "a",
               "hbool_t"                    => "b",
               "htri_t"                     => "b",
               "double"                     => "d",
               "H5D_alloc_time_t"           => "Da",
               "H5D_fill_time_t"            => "Df",
               "H5D_fill_value_t"           => "DF",
               "H5D_layout_t"               => "Dl",
               "H5D_space_status_t"         => "Ds",
               "H5FD_mpio_xfer_t"           => "Dt",
               "herr_t"                     => "e",
               "H5E_direction_t"            => "Ed",
               "H5E_error_t*"               => "Ee",
               "H5E_major_t"                => "i",
               "H5E_minor_t"                => "i",
               "H5E_type_t"                 => "Et",
               "H5F_close_degree_t"         => "Fd",
               "H5F_scope_t"                => "Fs",
               "H5FD_t*"                    => "x",
               "H5FD_class_t*"              => "x",
               "H5FD_stream_fapl_t*"        => "x",
               "H5G_link_t"                 => "Gl",
               "H5G_obj_t"                  => "Go",
               "H5G_stat_t*"                => "Gs",
               "hsize_t"                    => "h",
               "hssize_t"                   => "Hs",
               "hid_t"                      => "i",
               "int"                        => "Is",
               "unsigned"                   => "Iu",
               "unsigned int"               => "Iu",
               "H5I_type_t"                 => "It",
               "MPI_Comm"                   => "Mc",
               "MPI_Info"                   => "Mi",
               "H5FD_mem_t"                 => "Mt",
               "off_t"                      => "o",
               "H5P_class_t"                => "p",
               "href_t"                     => "r",
               "H5R_type_t"                 => "Rt",
               "char*"                      => "s",
               "H5S_class_t"                => "Sc",
               "H5S_seloper_t"              => "Ss",
               "H5S_sel_type"               => "St",
               "H5T_cset_t",                => "Tc",
               "H5T_direction_t",           => "Td",
               "H5T_norm_t"                 => "Tn",
               "H5T_order_t"                => "To",
               "H5T_pad_t"                  => "Tp",
               "H5T_pers_t"                 => "Te",
               "H5T_sign_t"                 => "Ts",
               "H5T_class_t"                => "Tt",
               "H5T_str_t"                  => "Tz",
               "void*"                      => "x",
               "void**"                     => "x",
               "FILE*"                      => "x",
               "H5A_operator_t"             => "x",
               "H5D_operator_t"             => "x",
               "H5E_auto_t"                 => "x",
               "H5E_walk_t"                 => "x",
               "H5G_iterate_t"              => "x",
               "H5MM_allocate_t"            => "x",
               "H5MM_free_t"                => "x",
               "H5P_cls_create_func_t"      => "x",
               "H5P_cls_copy_func_t"        => "x",
               "H5P_cls_close_func_t"       => "x",
               "H5P_iterate_t"              => "x",
               "H5P_prp_create_func_t"      => "x",
               "H5P_prp_copy_func_t"        => "x",
               "H5P_prp_close_func_t"       => "x",
               "H5P_prp_delete_func_t"      => "x",
               "H5P_prp_get_func_t"         => "x",
               "H5P_prp_set_func_t"         => "x",
               "H5T_cdata_t**"              => "x",
               "H5T_conv_t"                 => "x",
               "H5T_overflow_t"             => "x",
               "H5Z_func_t"                 => "x",
               "H5Z_filter_func_t"          => "x",
               "size_t"                     => "z",
               "H5Z_class_t*"               => "Zc",
               "H5Z_EDC_t"                  => "Ze",
               "H5Z_filter_t"               => "Zf",
               "ssize_t"                    => "Zs",
              );

##############################################################################
# Print an error message.
#
sub errmesg ($$@) {
  my ($file, $func, @mesg) = @_;
  my ($mesg) = join "", @mesg;
  my ($lineno) = 1;
  if ($Source =~ /(.*?\n)($func)/s) {
    local $_ = $1;
    $lineno = tr/\n/\n/;
  }

  print "$file: in function \`$func\':\n";
  print "$file:$lineno: $mesg\n";
}

##############################################################################
# Given a C data type return the type string that goes with it.
#
sub argstring ($$$) {
  my ($file, $func, $atype) = @_;
  my ($ptr, $tstr, $array) = (0, "!", "");
  my ($fq_atype);

  # Normalize the data type by removing redundant white space,
  # certain type qualifiers, and indirection.
  $atype =~ s/^\bconst\b//;
  $atype =~ s/\bUNUSED\b//g;
  $atype =~ s/\s+/ /g;
  $ptr = length $1 if  $atype =~ s/(\*+)//;
  $atype =~ s/^\s+//;
  $atype =~ s/\s+$//;
  if ($atype =~ /(.*)\[(.*)\]$/) {
    ($array, $atype) = ($2, $1);
    $atype =~ s/\s+$//;
  }
  $fq_atype = $atype . ('*' x $ptr);

  if ($ptr>0 && exists $TypeString{$fq_atype}) {
    $ptr = 0;
    $tstr = $TypeString{$fq_atype};
  } elsif ($ptr>0 && exists $TypeString{"$atype*"}) {
    --$ptr;
    $tstr = $TypeString{"$atype*"};
  } elsif (!exists $TypeString{$atype}) {
    errmesg $file, $func, "untraceable type \`$atype", '*'x$ptr, "\'";
  } else {
    $tstr = $TypeString{$atype};
  }
  return ("*" x $ptr) . ($array?"[$array]":"") . $tstr;
}

##############################################################################
# Given information about an API function, rewrite that function with
# updated tracing information.
#
sub rewrite_func ($$$$$) {
  my ($file, $type, $name, $args, $body) = @_;
  my ($arg,$trace);
  my (@arg_name, @arg_str);
  local $_;

  # Parse return value
  my $rettype = argstring $file, $name, $type;
  goto error if $rettype =~ /!/;

  # Parse arguments
  if ($args eq "void") {
    $trace = "H5TRACE0(\"$rettype\",\"\");\n";
  } else {
    # Split arguments.  First convert `/*in,out*/' to get rid of the
    # comma, then split the arguments on commas.
    $args =~ s/(\/\*\s*in),\s*(out\s*\*\/)/$1_$2/g;
    my @args = split /,[\s\n]*/, $args;
    my $argno = 0;
    my %names;

    for $arg (@args) {
      unless ($arg=~/^(([a-z_A-Z]\w*\s+)+\**)
	      ([a-z_A-Z]\w*)(\[.*?\])?
	      (\s*\/\*\s*(in|out|in_out)\s*\*\/)?\s*$/x) {
	errmesg $file, $name, "unable to parse \`$arg\'";
	goto error;
      } else {
	my ($atype, $aname, $array, $adir) = ($1, $3, $4, $6);
	$names{$aname} = $argno++;
	$adir ||= "in";
	$atype =~ s/\s+$//;
	push @arg_name, $aname;

	if ($adir eq "out") {
	  push @arg_str, "x";
	} else {
	  if (defined $array) {
	    $atype .= "*";
	    if ($array =~ /^\[\/\*([a-z_A-Z]\w*)\*\/\]$/) {
	      my $asize = $1;
	      if (exists $names{$asize}) {
		$atype .= '[a' . $names{$asize} . ']';
	      } else {
		warn "bad array size: $asize";
		$atype .= "*";
	      }
	    }
	  }
	  push @arg_str, argstring $file, $name, $atype;
	}
      }
    }
    $trace = "H5TRACE" . scalar(@arg_str) . "(\"$rettype\",\"";
    $trace .= join("", @arg_str) . "\"";
    my $len = 4 + length $trace;
    for (@arg_name) {
      if ($len + length >= 77) {
	$trace .= ",\n             $_";
	$len = 13 + length;
      } else {
	$trace .= ",$_";
	$len += 1 + length;
      }
    }
    $trace .= ");\n";
  }
  goto error if grep {/!/} @arg_str;

  # The H5TRACE() statement
  if ($body =~ /\/\*[ \t]*NO[ \t]*TRACE[ \t]*\*\//) {
    if ($body =~ /\s*H5TRACE\d+\s*\(/) {
      errmesg $file, $name, "warning: trace info was not updated";
    } else {
      errmesg $file, $name, "warning: trace info was not inserted";
    }
  } elsif ($body =~ s/((\n[ \t]*)H5TRACE\d+\s*\(.*?\);)\n/"$2$trace"/es) {
    # Replaced an H5TRACE macro
  } elsif ($body=~s/((\n[ \t]*)FUNC_ENTER\w*\s*\(.*?\);??)\n/"$1$2$trace"/es) {
    # Added an H5TRACE macro after a FUNC_ENTER macro.
  } else {
    errmesg $file, $name, "unable to insert tracing information";
    print "body = ", $body, "\n";
    goto error;
  }

  
 error:
  return "\n$type\n$name($args)\n$body";
}

##############################################################################
# Process each source file, rewriting API functions with updated
# tracing information.
#
my $total_api = 0;
for $file (@ARGV) {
  # Snarf up the entire file
  open SOURCE, $file or die "$file: $!\n";
  $Source = join "", <SOURCE>;
  close SOURCE;

  # Make modifications
  my $original = $Source;
  my $napi = $Source =~ s/\n([A-Za-z]\w*(\s+[a-z]\w*)*)\s*\n #type
                            (H5[A-Z]{0,2}[^_A-Z]\w*)      #name
                            \s*\((.*?)\)\s*               #args
                            (\{.*?\n\}[^\n]*)             #body
                         /rewrite_func($file,$1,$3,$4,$5)/segx;
  $total_api += $napi;
  
  # If the source changed then print out the new version
  if ($original ne $Source) {
    printf "%s: instrumented %d API function%s\n", 
           $file, $napi, 1==$napi?"":"s";
    rename $file, "$file~" or die "unable to make backup";
    open SOURCE, ">$file" or die "unable to modify source";
    print SOURCE $Source;
    close SOURCE;
  }
}