diff options
-rw-r--r-- | Doc/lib/libcompilerlike.tex | 87 | ||||
-rw-r--r-- | Lib/compilerlike.py | 154 |
2 files changed, 241 insertions, 0 deletions
diff --git a/Doc/lib/libcompilerlike.tex b/Doc/lib/libcompilerlike.tex new file mode 100644 index 0000000..3e9daff --- /dev/null +++ b/Doc/lib/libcompilerlike.tex @@ -0,0 +1,87 @@ +\section{\module{compilerlike} --- + framework code for building compiler-like programs.} + +\declaremodule{standard}{set} +\modulesynopsis{Framework code for building compiler-like programs.} +\moduleauthor{Eric S. Raymond}{esr@thyrsus.com} +\sectionauthor{Eric S. Raymond}{esr@thyrsus.com} + +There is a common `compiler-like' pattern in Unix scripts which is useful +for translation utilities of all sorts. A program following this pattern +behaves as a filter when no argument files are specified on the command +line, but otherwise transforms each file individually into a corresponding +output file. + +The \function{filefilter}, \function{linefilter}, and +\function{sponge} functions in this module provide a framework and +glue code to make such programs easy to write. You supply a function +to massage the file data; depending on which entry point you use, it +can take input and output file pointers, or it can take a string +consisting of the entire file's data and return a replacement, or it +can take in succession strings consisting of each of the file's lines +and return a translated line for each. + +All three of these entry points take a name, an argument list of files, +a data transformation function, and a name transformation function. +They differ only in the arguments they pass to the transformation +function when it is called. + +The name argument is not used by the functions in this module, it is +simply passed as the first argument to the transformation function. +Typically it is a string that names the filter and is used in +generating error messages, but it could be arbitrary data. + +The second argument, is interpreted as a list of filenames. The files +are transformed in left to right order in the list. A filename +consisting of a dash is interpreted as a directive to read from +standard input (this can be useful in pipelines). + +The third argument is the data transformation function. +Interpretation of this argument varies across the three +entry points and is described below. + +The fourth, optional argument is a name transformation function or +name suffix string. If it is of string type, the shortest suffix of each +filename beginning with the first character of the argument string +is stripped off. If the first character of the argument does not +occur in the filename, no suffix is removed. Then the name suffix +argument is concatenated to the end of the stripped filename. (Thus, +a name suffix argument of ".x" will cause the filenames foo.c and +bar.d to be transformed to foo.x and bar.x respectively.) + +If the fourth argument is specified and is a function, the name of the +input file is passed to it and the return value of the function +becomes the name of the output software. If this argument is not +specified, the imnput file is replaced with the transformed version. + +Replacement of each file is atomic and doesn't occur until the +translation of that file has completed. Any tempfiles are removed +automatically on any exception thrown by the translation function, +and the exception is then passed upwards. + +\begin{funcdesc}{filefilter}{name, arguments, trans_data\optional{,trans_file}} +Filter using a function taking the name and two file-object +arguments. The function is expected to read data from the input file +object, transform it, and write the data to the output file object. +When the function terminates, the translation is done. The return +value of the transformation function is not used. +\end{funcdesc} + +\begin{funcdesc}{linefilter}{name,arguments,trans_data\optional{,trans_file}} +Filter using a function taking the name and a string argument. The return +value of the function should be a string. This function is applied to +each line in the input file in turn; the return values become the +lines of the transformed file. +\end{funcdesc} + +\begin{funcdesc}{sponge}{name, arguments, trans_data\optional{, trans_file}} +Filter using a function taking the name and a string argument. The +return value of the function should be a string. The function will be +passed the entire contents of the input file as a string. The string +return value of the function will become the entire contents of the +transformed file. +\end{funcdesc} + +# End + + diff --git a/Lib/compilerlike.py b/Lib/compilerlike.py new file mode 100644 index 0000000..618f6bb --- /dev/null +++ b/Lib/compilerlike.py @@ -0,0 +1,154 @@ +""" +compilerlike -- framework code for building compiler-like programs. + +There is a common `compiler-like' pattern in Unix scripts which is useful +for translation utilities of all sorts. A program following this pattern +behaves as a filter when no argument files are specified on the command +line, but otherwise transforms each file individually into a corresponding +output file. + +This module provides framework and glue code to make such programs easy +to write. You supply a function to massage the file data; depending +on which entry point you use, it can take input and output file pointers, +or it can take a string consisting of the entire file's data and return +a replacement, or it can take in succession strings consisting of each +of the file's lines and return a translated line for each. + +Argument files are transformed in left to right order in the argument list. +A filename consisting of a dash is interpreted as a directive to read from +standard input (this can be useful in pipelines). + +Replacement of each file is atomic and doesn't occur until the +translation of that file has completed. Any tempfiles are removed +automatically on any exception thrown by the translation function, +and the exception is then passed upwards. +""" + +# Requires Python 2. +from __future__ import nested_scopes + +import sys, os, filecmp, traceback + +def filefilter(name, arguments, trans_data, trans_filename=None): + "Filter stdin to stdout, or file arguments to renamed files." + if not arguments: + trans_data("stdin", sys.stdin, sys.stdout) + else: + for file in arguments: + if file == '-': # - is conventional for stdin + file = "stdin" + infp = sys.stdin + else: + infp = open(file) + tempfile = file + ".~%s-%d~" % (name, os.getpid()) + outfp = open(tempfile, "w") + try: + trans_data(file, infp, outfp) + except: + os.remove(tempfile) + # Pass the exception upwards + (exc_type, exc_value, exc_traceback) = sys.exc_info() + raise exc_type, exc_value, exc_traceback + if filecmp.cmp(file, tempfile): + os.remove(tempfile) + else: + if not trans_filename: + os.rename(tempfile, file) + elif type(trans_filename) == type(""): + i = file.rfind(trans_filename[0]) + if i > -1: + file = file[:i] + os.rename(tempfile, stem + trans_filename) + else: + os.rename(tempfile, trans_filename(file)) + +def line_by_line(name, infp, outfp, translate_line): + "Hook to do line-by-line translation for filters." + while 1: + line = infp.readline() + if line == "": + break + elif line: # None returns are skipped + outfp.write(translate_line(name, line)) + +def linefilter(name, arguments, trans_data, trans_filename=None): + "Filter framework for line-by-line transformation." + return filefilter(name, + arguments, + lambda name, infp, outfp: line_by_line(name, infp, outfp, trans_data), + trans_filename) + +def sponge(name, arguments, trans_data, trans_filename=None): + "Read input sources entire and transform them in memory." + if not arguments: + sys.stdout.write(trans_data(name, sys.stdin.read())) + else: + for file in arguments: + infp = open(file) + indoc = infp.read() + infp.close() + tempfile = file + ".~%s-%d~" % (name, os.getpid()) + try: + outfp = open(tempfile, "w") + except OSError: + sys.stderr.write("%s: can't open tempfile" % name) + return 1 + try: + outdoc = trans_data(name, indoc) + except: + os.remove(tempfile) + # Pass the exception upwards + (exc_type, exc_value, exc_traceback) = sys.exc_info() + raise exc_type, exc_value, exc_traceback + if outdoc == indoc: + os.remove(tempfile) + else: + outfp.write(outdoc) + if not trans_filename: + os.rename(tempfile, file) + elif type(trans_filename) == type(""): + i = file.rfind(trans_filename[0]) + if i > -1: + file = file[:i] + os.rename(tempfile, file + trans_filename) + else: + os.rename(tempfile, trans_filename(file)) + +if __name__ == '__main__': + import getopt + + def nametrans(name): + return name + ".out" + + def filefilter_test(name, infp, outfp): + "Test hook for filefilter entry point -- put dashes before blank lines." + while 1: + line = infp.readline() + if not line: + break + if line == "\n": + outfp.write("------------------------------------------\n") + outfp.write(line) + + def linefilter_test(name, data): + "Test hook for linefilter entry point -- wrap lines in brackets." + return "<" + data[:-1] + ">\n" + + def sponge_test(name, data): + "Test hook for the sponge entry point -- reverse file lines." + lines = data.split("\n") + lines.reverse() + return "\n".join(lines) + + (options, arguments) = getopt.getopt(sys.argv[1:], "fls") + for (switch, val) in options: + if switch == '-f': + filefilter("filefilter_test", arguments, filefilter_test,nametrans) + elif switch == '-l': + linefilter("linefilter_test", arguments, linefilter_test,nametrans) + elif switch == '-s': + sponge("sponge_test", arguments, sponge_test, ".foo") + else: + print "Unknown option." + +# End |