#! /bin/sh
#
# Copyright by the Board of Trustees of the University of Illinois.
# All rights reserved.
#
# This file is part of HDF5.  The full HDF5 copyright notice, including
# terms governing use, modification, and redistribution, is contained in
# the files COPYING and Copyright.html.  COPYING can be found at the root
# of the source code distribution tree; Copyright.html can be found at the
# root level of an installed copy of the electronic HDF5 document set and
# is linked from the top-level documents page.  It can also be found at
# http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html.  If you do not have
# access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu.
#

# Check Copyright notice.
# Check that all the files have the proper copyright notice.
# It goes down directories recursively.
#
# Programmer: Albert Cheng
# Created Data: 2003/07/22
# Modification:
#	Rewrote most of it. Albert Cheng, 2005/10/10.

# Setup
#
PROGNAME=$0
DIFF="diff"
INITFILE=.h5chkright.ini
EXCEPTIONS=/tmp/h5chkright.except.$$
tmpfile=/tmp/h5chkright_tmp.$$
EXCEPTIONDIRS="-name CVS"	# at least skip CVS directories.
EXTRACTEDFILE=/tmp/h5chkright.extracted.$$
VERBOSE=		# default no
DIRS=.			# default current directory
NFAILEDFILES=0		# Number of failed files found.
NPASSEDFILES=0		# Number of failed files found.
NUMBEGINLINES=60	# Copyright notice should be located within the
			# this number of lines at the beginning of the file.
COPYRIGHTSTR="Copyright by the Board of Trustees of the University of Illinois"

PASSEDLOG=/tmp/h5chkright_passed.$$
SKIPPEDLOG=/tmp/h5chkright_skipped.$$
FAILEDLOG=/tmp/h5chkright_failed.$$

C_COPYRIGHT=/tmp/h5chkright_C.$$	# C style copyright
FTN_COPYRIGHT=/tmp/h5chkright_FTN.$$	# Fortran style copyright
HTM_COPYRIGHT=/tmp/h5chkright_HTM.$$	# HTML style copyright
SH_COPYRIGHT=/tmp/h5chkright_SH.$$	# SHELL style copyright
SH_COPYRIGHT2=/tmp/h5chkright_SH2.$$	# SHELL style copyright, 2nd style.
WINBAT_COPYRIGHT=/tmp/h5chkright_WINBAT.$$	# Windows Batch file Copyright notice


# Help page
#
USAGE()
{
    cat <<EOF
Usage: $PROGNAME [-h | -help] [-fname name-patter] [-v | -v9] [dir1 dir2 ...]
    Check copyright notices of files in [dir1 dir2 ...}.
    Default is to check files in current directory.
    -h | -help
	show this page.
    -fname name-pattern
        limit to files of name-pattern
    -v
	verbose mode
    -v9
	highly verbose
EOF
}


# Generate various styles of Copyright notices
#
BUILDCOPYRIGHT()
{
    # C and C++ source Copyright notice
    cat > ${C_COPYRIGHT} << \EOF
 * Copyright by the Board of Trustees of the University of Illinois.         *
 * All rights reserved.                                                      *
 *                                                                           *
 * This file is part of HDF5.  The full HDF5 copyright notice, including     *
 * terms governing use, modification, and redistribution, is contained in    *
 * the files COPYING and Copyright.html.  COPYING can be found at the root   *
 * of the source code distribution tree; Copyright.html can be found at the  *
 * root level of an installed copy of the electronic HDF5 document set and   *
 * is linked from the top-level documents page.  It can also be found at     *
 * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html.  If you do not have     *
 * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. *
EOF

    # Fortran9X source Copyright notice
    cat > ${FTN_COPYRIGHT} << \EOF
!   Copyright by the Board of Trustees of the University of Illinois.         *
!   All rights reserved.                                                      *
!                                                                             *
!   This file is part of HDF5.  The full HDF5 copyright notice, including     *
!   terms governing use, modification, and redistribution, is contained in    *
!   the files COPYING and Copyright.html.  COPYING can be found at the root   *
!   of the source code distribution tree; Copyright.html can be found at the  *
!   root level of an installed copy of the electronic HDF5 document set and   *
!   is linked from the top-level documents page.  It can also be found at     *
!   http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html.  If you do not have     *
!   access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. *
EOF

    # HTML file Copyright notice
    cat > ${HTM_COPYRIGHT} << \EOF
  * Copyright by the Board of Trustees of the University of Illinois.         *
  * All rights reserved.                                                      *
  *                                                                           *
  * This file is part of HDF5.  The full HDF5 copyright notice, including     *
  * terms governing use, modification, and redistribution, is contained in    *
  * the files COPYING and Copyright.html.  COPYING can be found at the root   *
  * of the source code distribution tree; Copyright.html can be found at the  *
  * root level of an installed copy of the electronic HDF5 document set and   *
  * is linked from the top-level documents page.  It can also be found at     *
  * http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html.  If you do not have     *
  * access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu. *
EOF

    # Shell style Copyright notice
    cat > ${SH_COPYRIGHT} << \EOF
# Copyright by the Board of Trustees of the University of Illinois.
# All rights reserved.
#
# This file is part of HDF5.  The full HDF5 copyright notice, including
# terms governing use, modification, and redistribution, is contained in
# the files COPYING and Copyright.html.  COPYING can be found at the root
# of the source code distribution tree; Copyright.html can be found at the
# root level of an installed copy of the electronic HDF5 document set and
# is linked from the top-level documents page.  It can also be found at
# http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html.  If you do not have
# access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu.
EOF

    # Shell style Copyright notice (2nd type)
    cat > ${SH_COPYRIGHT2} << \EOF
## Copyright by the Board of Trustees of the University of Illinois.
## All rights reserved.
##
## This file is part of HDF5.  The full HDF5 copyright notice, including
## terms governing use, modification, and redistribution, is contained in
## the files COPYING and Copyright.html.  COPYING can be found at the root
## of the source code distribution tree; Copyright.html can be found at the
## root level of an installed copy of the electronic HDF5 document set and
## is linked from the top-level documents page.  It can also be found at
## http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html.  If you do not have
## access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu.
EOF

    # Windows Batch file Copyright notice
    cat > ${WINBAT_COPYRIGHT} << \EOF
@REM Copyright by the Board of Trustees of the University of Illinois.
@REM All rights reserved.
@REM
@REM This file is part of HDF5.  The full HDF5 copyright notice, including
@REM terms governing use, modification, and redistribution, is contained in
@REM the files COPYING and Copyright.html.  COPYING can be found at the root
@REM of the source code distribution tree; Copyright.html can be found at the
@REM root level of an installed copy of the electronic HDF5 document set and
@REM is linked from the top-level documents page.  It can also be found at
@REM http://hdf.ncsa.uiuc.edu/HDF5/doc/Copyright.html.  If you do not have
@REM access to either file, you may request a copy from hdfhelp@ncsa.uiuc.edu.
EOF

}

# Initialization
#
# Initialize file format:
# Each line is a keyword for action and the rest are values.
# Keywords:
# '#'           Comments
# skip          Files to be skipped
# prune         Directories to be skipped.  Notice this prunes all directories
#               with the same name.  E.g.,
#               "prune test" skips test, fortran/test, c++/test, ...
#		(See -name option in the find command.)
# prunepath     Directory or file to be skipped.  Notice this is different from
#               prunes since it matches the exact pathname. E.g.,
#               "prunepath ./tools/testfiles" skips the directory/file matching
#		exactly that path but NOT tools/h5dump/testfiles nor
#               tools/h5dump/testfiles.
#		(See -path option in the find command.)
#
INITIALIZATION()
{
    # clean up log files
    rm -f $PASSEDLOG $SKIPPEDLOG $FAILEDLOG

    # Generate various styles of copyright notice.
    BUILDCOPYRIGHT

    echo Initialization...
    # setup exceptions.
    cp /dev/null $EXCEPTIONS

    # Process Initial setting file if exists
    if [ -r $INITFILE ]; then
	while read key value; do
	    case "$key" in
	    \#* | '')	# Comment or blank line, skip it
		continue
		;;
	    skip)
		echo $key $value
		echo $value >> $EXCEPTIONS
		;;
	    prune)
		echo $key $value
		EXCEPTIONDIRS="$EXCEPTIONDIRS -o -name $value"
		;;
	    prunepath)
		echo $key $value
		EXCEPTIONDIRS="$EXCEPTIONDIRS -o -path $value"
		;;
	    *)
		echo unknown setting input in file $INITFILE
		echo $key $value
		;;
	    esac
	done < $INITFILE
    fi
    
    # Change EXCEPTIONDIRS to be compatible with find command.
    EXCEPTIONDIRS="( $EXCEPTIONDIRS ) -prune -o"
    echo Initialization done
}


# Parse Options
#
PARSE_OPTION()
{
    while test $# -gt 0 ; do
	case "$1" in
	-h | -help )
	    USAGE
	    exit 0
	    ;;
	-fname )
	    shift
	    FNAME="$1"
	    ;;
	-v* )
	    VERBOSE=yes
	    if test X$1 = X-v9; then
		set -x
	    fi
	    ;;
	-* )
	    echo "***Unknown option ($1)"
	    USAGE
	    exit 1
	    ;;
	* )
	    DIRS=$*
	    break
	    ;;
	esac
	shift
    done
}


# Rinse the file by,
#    removing all \r which is often present in Windows files;
#    replace tabs with equivalent spaces;
#    removing all trailing spaces.
# $1 is the file to be rinsed.
RINSE()
{
    rf=$1
    cp $rf $tmpfile
    dos2unix < $tmpfile | expand | sed -e 's/ *$//' > $rf
}


# Match Copyright notice.
# $1 file which contains the expected copyright notice.
# $2 file in which to look for the copyright notice.
# Copyright notice must be found within the beginning $NUMBEGINLINES of lines.
# Hunt for the particular string $COPYRIGHTSTR which signifies the beginning
# of the copyright notice.
#
MATCH_COPYRIGHT()
{
    if [ $# -ne 2 ]; then
	# expect two arguments
	echo FAILED
	return
    fi
    COPYRIGHTFILE=$1
    f=$2
    # Must use stdin for wc to prevent filename from popping up.
    nlines=`wc -l < ${COPYRIGHTFILE}| tr -d ' '`
    head -$NUMBEGINLINES < $f | sed -n -e "/${COPYRIGHTSTR}/"',$p' | \
	head -${nlines} > ${EXTRACTEDFILE}
    RINSE ${EXTRACTEDFILE}
    $DIFF ${EXTRACTEDFILE} ${COPYRIGHTFILE} >/dev/null 2>&1
    if test $? -eq 0; then
	echo PASSED
    else
	echo FAILED
    fi
}

# Check C and C++ source files
#
C_SOURCE()
{
    f=$1
    case `MATCH_COPYRIGHT $C_COPYRIGHT $f` in
    PASSED)
	PASSED $f
	return
	;;
    FAILED)
	# show the difference
	FAILED $f
	$DIFF ${EXTRACTEDFILE} ${C_COPYRIGHT}
	;;
    esac
}


# Check Fortran90 source files
#
FORTRAN_SOURCE()
{
    f=$1
    case `MATCH_COPYRIGHT $FTN_COPYRIGHT $f` in
    PASSED)
	PASSED $f
	return
	;;
    FAILED)
	# show the difference
	FAILED $f
	$DIFF ${EXTRACTEDFILE} ${FTN_COPYRIGHT}
	;;
    esac
}


# Check HTML Files
#
HTML_FILE()
{
    f=$1
    case `MATCH_COPYRIGHT $HTM_COPYRIGHT $f` in
    PASSED)
	PASSED $f
	return
	;;
    FAILED)
	# show the difference
	FAILED $f
	$DIFF ${EXTRACTEDFILE} ${HTM_COPYRIGHT}
	;;
    esac
}


# Check Shell script files and other files (e.g., Makefile) that use
# the style of copyright notice of leading #'s.
# There is a preferred style (single leading #) vs the alternate style (double
# leading #'s).  The double leading #'s style is used in Makefile.am, for
# example.
#
SHELL_FILE()
{
    f=$1
    if [ `MATCH_COPYRIGHT $SH_COPYRIGHT $f` = FAILED -a \
	 `MATCH_COPYRIGHT $SH_COPYRIGHT2 $f` = FAILED ]; then
	# show the differences with the preferred style.
	FAILED $f
	$DIFF ${EXTRACTEDFILE} ${SH_COPYRIGHT}
    else
	PASSED $f
    fi
}


# Check Windows Batch files
#
BATCH_FILE()
{
    f=$1
    case `MATCH_COPYRIGHT $WINBAT_COPYRIGHT $f` in
    PASSED)
	PASSED $f
	return
	;;
    FAILED)
	# show the difference
	FAILED $f
	$DIFF ${EXTRACTEDFILE} ${WINBAT_COPYRIGHT}
	;;
    esac
}


# Check Unknown type file.
# First check if there is something that resemble a copyright notice in
# the first "page".  If so, then inspect the first 5 lines to guess what
# type of file it is.  Then try verify Copyright notice according to
# guessed type.
#
UNKNOWN_FILE()
{
    f=$1
    if head -$NUMBEGINLINES < $f | grep "${COPYRIGHTSTR}" > /dev/null; then
	# Now guess the file type and try match it.
	head -5 < $f > $tmpfile
	if head -1 < $tmpfile | grep '^#!' > /dev/null; then
	    # First line is "#!".  It is likely a shell script or similar type.
	    SHELL_FILE $f
	elif grep '\/\*' < $tmpfile > /dev/null; then
	    # Found some lines containing '/*'.  It may be a C/C++ style file.
	    C_SOURCE $f
	elif grep '^!' < $tmpfile > /dev/null; then
	    # Some lines start with a "!".  It may be a Fortran 9X style file.
	    FORTRAN_SOURCE $f
	elif grep '^#' < $tmpfile > /dev/null; then
	    # Some lines start with a "#".  It may be a shell like type.
	    # Put this after C_SOURCE which may have #define and such lines.
	    SHELL_FILE $f
	elif grep -i '^<html>' < $tmpfile > /dev/null || \
	     grep '^<!--' < $tmpfile > /dev/null ; then
	    # Some lines start with a "<html>" or having an html comment tag.
	    # It may be an HTML file.
	    HTML_FILE $f
	else
	    # Unknown type.
	    UNKNOWN_TYPE $f
	fi
    else
	# Unknown type.
	UNKNOWN_TYPE $f
    fi
}


# Passed checking.
# $1 file that has passed.
#
PASSED()
{
    if test X-$VERBOSE = X-yes; then
	echo "    PASSED"
    fi
    echo $1 >> $PASSEDLOG
}


# Unknown file type. Considered a fail.
# $1 name of unknown file.
#
UNKNOWN_TYPE()
{
    echo "UNKNOWN type: $1" | tee -a $FAILEDLOG
}


# Skip checking.
# $1 file that is skipped.
#
SKIP()
{
    if test X-$VERBOSE = X-yes; then
	echo "    SKIPPED"
    fi
    echo $1 >> $SKIPPEDLOG
}


# Failed checking.
# $1 file that has failed.
#
FAILED()
{
    echo "FAILED:  $1"
    echo $1 >> $FAILEDLOG
}


#
# Main body

PARSE_OPTION "$@"
INITIALIZATION

# use find to list all those file names and process them
# one by one.
if test -z "$FNAME" ; then
    find $DIRS $EXCEPTIONDIRS -type f -print
else
    find $DIRS -type f -name "${FNAME}" -print 
fi |
while read file; do
    if test X-$VERBOSE = X-yes; then
	echo checking ${file}...
    fi
    if echo $file | egrep -f $EXCEPTIONS > /dev/null; then
	SKIP ${file}
    else
	case ${file} in
	*.c | *.h | *.cpp )
	    C_SOURCE ${file}
	    ;;
	*.f90 )
	    FORTRAN_SOURCE ${file}
	    ;;
	*.htm | *.html )
	    HTML_FILE ${file}
	    ;;
	*.sh | *.sh.in | *Makefile | *Makefile.in | *Makefile.am )
	    SHELL_FILE ${file}
	    ;;
	*.bat | *.BAT )
	    # Windows Batch files
	    BATCH_FILE ${file}
	    ;;
	*.h5 | *.hdf5 )
	    # Ignore HDF5 data files
	    continue
	    ;;
	*.jpg | *.obj | *.gif | *.png | *.pdf | \
	*.JPG | *.OBJ | *.GIF | *.PNG | *.PDF )
	    # Ignore binary data files
	    continue
	    ;;
	*.zip | *.dsp | *.dsw | *.js | *.sln )
	    # Ignore Windows binary or special files.
	    # .dsp & .dsw are Visual Studio project files.
	    # .sln are .NET solution files.
	    # .js are Microsoft Java Script files.
	    continue
	    ;;
	*CVS/* )
	    # Ignore CVS control files.
	    continue
	    ;;
	*)
	    UNKNOWN_FILE $file
	    ;;
	esac
    fi
done

# check results
if [ -f $PASSEDLOG ]; then
    NPASSEDFILES=`wc -l < $PASSEDLOG`
fi
if [ -f $FAILEDLOG ]; then
    NFAILEDFILES=`wc -l < $FAILEDLOG`
fi

# Cleanup
rm -f $C_COPYRIGHT $FTN_COPYRIGHT $HTM_COPYRIGHT $SH_COPYRIGHT $SH_COPYRIGHT2
rm -f $EXCEPTIONS $EXTRACTEDFILE
rm -f $PASSEDLOG $SKIPPEDLOG $FAILEDLOG

# Report Results
# Results are not total accurate--e.g., Passed are not counted, thus not
# reported.
# 
echo "*******************"
echo " REPORT"
echo "*******************"
echo Number of passed files = $NPASSEDFILES
echo Number of failed files = $NFAILEDFILES

if [ $NFAILEDFILES -gt 0 ]; then
    exitcode=1
else
    exitcode=0
fi
exit  $exitcode