From 002905f29fc910c7d9756ae70edeffb96a21585a Mon Sep 17 00:00:00 2001 From: Krystian Kuzniarek Date: Thu, 8 Aug 2019 20:39:33 +0200 Subject: move the pumping script to googlemock --- CONTRIBUTING.md | 4 +- googlemock/docs/pump_manual.md | 190 +++++++++ googlemock/scripts/pump.py | 855 +++++++++++++++++++++++++++++++++++++++++ googletest/docs/pump_manual.md | 190 --------- googletest/scripts/pump.py | 855 ----------------------------------------- 5 files changed, 1047 insertions(+), 1047 deletions(-) create mode 100644 googlemock/docs/pump_manual.md create mode 100755 googlemock/scripts/pump.py delete mode 100644 googletest/docs/pump_manual.md delete mode 100755 googletest/scripts/pump.py diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index b9c14e9..4c18499 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -138,5 +138,5 @@ sense) using a script. For example, the file You don't need to worry about regenerating the source files unless you need to modify them. You would then modify the corresponding `.pump` files and run the -'[pump.py](googletest/scripts/pump.py)' generator script. See the -[Pump Manual](googletest/docs/pump_manual.md). +'[pump.py](googlemock/scripts/pump.py)' generator script. See the +[Pump Manual](googlemock/docs/pump_manual.md). diff --git a/googlemock/docs/pump_manual.md b/googlemock/docs/pump_manual.md new file mode 100644 index 0000000..10b3c5f --- /dev/null +++ b/googlemock/docs/pump_manual.md @@ -0,0 +1,190 @@ +Pump is Useful for Meta Programming. + +# The Problem + +Template and macro libraries often need to define many classes, functions, or +macros that vary only (or almost only) in the number of arguments they take. +It's a lot of repetitive, mechanical, and error-prone work. + +Variadic templates and variadic macros can alleviate the problem. However, while +both are being considered by the C++ committee, neither is in the standard yet +or widely supported by compilers. Thus they are often not a good choice, +especially when your code needs to be portable. And their capabilities are still +limited. + +As a result, authors of such libraries often have to write scripts to generate +their implementation. However, our experience is that it's tedious to write such +scripts, which tend to reflect the structure of the generated code poorly and +are often hard to read and edit. For example, a small change needed in the +generated code may require some non-intuitive, non-trivial changes in the +script. This is especially painful when experimenting with the code. + +# Our Solution + +Pump (for Pump is Useful for Meta Programming, Pretty Useful for Meta +Programming, or Practical Utility for Meta Programming, whichever you prefer) is +a simple meta-programming tool for C++. The idea is that a programmer writes a +`foo.pump` file which contains C++ code plus meta code that manipulates the C++ +code. The meta code can handle iterations over a range, nested iterations, local +meta variable definitions, simple arithmetic, and conditional expressions. You +can view it as a small Domain-Specific Language. The meta language is designed +to be non-intrusive (s.t. it won't confuse Emacs' C++ mode, for example) and +concise, making Pump code intuitive and easy to maintain. + +## Highlights + +* The implementation is in a single Python script and thus ultra portable: no + build or installation is needed and it works cross platforms. +* Pump tries to be smart with respect to + [Google's style guide](https://github.com/google/styleguide): it breaks long + lines (easy to have when they are generated) at acceptable places to fit + within 80 columns and indent the continuation lines correctly. +* The format is human-readable and more concise than XML. +* The format works relatively well with Emacs' C++ mode. + +## Examples + +The following Pump code (where meta keywords start with `$`, `[[` and `]]` are +meta brackets, and `$$` starts a meta comment that ends with the line): + +``` +$var n = 3 $$ Defines a meta variable n. +$range i 0..n $$ Declares the range of meta iterator i (inclusive). +$for i [[ + $$ Meta loop. +// Foo$i does blah for $i-ary predicates. +$range j 1..i +template +class Foo$i { +$if i == 0 [[ + blah a; +]] $elif i <= 2 [[ + blah b; +]] $else [[ + blah c; +]] +}; + +]] +``` + +will be translated by the Pump compiler to: + +```cpp +// Foo0 does blah for 0-ary predicates. +template +class Foo0 { + blah a; +}; + +// Foo1 does blah for 1-ary predicates. +template +class Foo1 { + blah b; +}; + +// Foo2 does blah for 2-ary predicates. +template +class Foo2 { + blah b; +}; + +// Foo3 does blah for 3-ary predicates. +template +class Foo3 { + blah c; +}; +``` + +In another example, + +``` +$range i 1..n +Func($for i + [[a$i]]); +$$ The text between i and [[ is the separator between iterations. +``` + +will generate one of the following lines (without the comments), depending on +the value of `n`: + +```cpp +Func(); // If n is 0. +Func(a1); // If n is 1. +Func(a1 + a2); // If n is 2. +Func(a1 + a2 + a3); // If n is 3. +// And so on... +``` + +## Constructs + +We support the following meta programming constructs: + +| `$var id = exp` | Defines a named constant value. `$id` is | +: : valid util the end of the current meta : +: : lexical block. : +| :------------------------------- | :--------------------------------------- | +| `$range id exp..exp` | Sets the range of an iteration variable, | +: : which can be reused in multiple loops : +: : later. : +| `$for id sep [[ code ]]` | Iteration. The range of `id` must have | +: : been defined earlier. `$id` is valid in : +: : `code`. : +| `$($)` | Generates a single `$` character. | +| `$id` | Value of the named constant or iteration | +: : variable. : +| `$(exp)` | Value of the expression. | +| `$if exp [[ code ]] else_branch` | Conditional. | +| `[[ code ]]` | Meta lexical block. | +| `cpp_code` | Raw C++ code. | +| `$$ comment` | Meta comment. | + +**Note:** To give the user some freedom in formatting the Pump source code, Pump +ignores a new-line character if it's right after `$for foo` or next to `[[` or +`]]`. Without this rule you'll often be forced to write very long lines to get +the desired output. Therefore sometimes you may need to insert an extra new-line +in such places for a new-line to show up in your output. + +## Grammar + +```ebnf +code ::= atomic_code* +atomic_code ::= $var id = exp + | $var id = [[ code ]] + | $range id exp..exp + | $for id sep [[ code ]] + | $($) + | $id + | $(exp) + | $if exp [[ code ]] else_branch + | [[ code ]] + | cpp_code +sep ::= cpp_code | empty_string +else_branch ::= $else [[ code ]] + | $elif exp [[ code ]] else_branch + | empty_string +exp ::= simple_expression_in_Python_syntax +``` + +## Code + +You can find the source code of Pump in [scripts/pump.py](../scripts/pump.py). +It is still very unpolished and lacks automated tests, although it has been +successfully used many times. If you find a chance to use it in your project, +please let us know what you think! We also welcome help on improving Pump. + +## Real Examples + +You can find real-world applications of Pump in +[Google Test](https://github.com/google/googletest/tree/master/googletest) and +[Google Mock](https://github.com/google/googletest/tree/master/googlemock). The +source file `foo.h.pump` generates `foo.h`. + +## Tips + +* If a meta variable is followed by a letter or digit, you can separate them + using `[[]]`, which inserts an empty string. For example `Foo$j[[]]Helper` + generate `Foo1Helper` when `j` is 1. +* To avoid extra-long Pump source lines, you can break a line anywhere you + want by inserting `[[]]` followed by a new line. Since any new-line + character next to `[[` or `]]` is ignored, the generated code won't contain + this new line. diff --git a/googlemock/scripts/pump.py b/googlemock/scripts/pump.py new file mode 100755 index 0000000..66e3217 --- /dev/null +++ b/googlemock/scripts/pump.py @@ -0,0 +1,855 @@ +#!/usr/bin/env python2.7 +# +# Copyright 2008, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""pump v0.2.0 - Pretty Useful for Meta Programming. + +A tool for preprocessor meta programming. Useful for generating +repetitive boilerplate code. Especially useful for writing C++ +classes, functions, macros, and templates that need to work with +various number of arguments. + +USAGE: + pump.py SOURCE_FILE + +EXAMPLES: + pump.py foo.cc.pump + Converts foo.cc.pump to foo.cc. + +GRAMMAR: + CODE ::= ATOMIC_CODE* + ATOMIC_CODE ::= $var ID = EXPRESSION + | $var ID = [[ CODE ]] + | $range ID EXPRESSION..EXPRESSION + | $for ID SEPARATOR [[ CODE ]] + | $($) + | $ID + | $(EXPRESSION) + | $if EXPRESSION [[ CODE ]] ELSE_BRANCH + | [[ CODE ]] + | RAW_CODE + SEPARATOR ::= RAW_CODE | EMPTY + ELSE_BRANCH ::= $else [[ CODE ]] + | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH + | EMPTY + EXPRESSION has Python syntax. +""" + +from __future__ import print_function + +import os +import re +import sys + + +TOKEN_TABLE = [ + (re.compile(r'\$var\s+'), '$var'), + (re.compile(r'\$elif\s+'), '$elif'), + (re.compile(r'\$else\s+'), '$else'), + (re.compile(r'\$for\s+'), '$for'), + (re.compile(r'\$if\s+'), '$if'), + (re.compile(r'\$range\s+'), '$range'), + (re.compile(r'\$[_A-Za-z]\w*'), '$id'), + (re.compile(r'\$\(\$\)'), '$($)'), + (re.compile(r'\$'), '$'), + (re.compile(r'\[\[\n?'), '[['), + (re.compile(r'\]\]\n?'), ']]'), + ] + + +class Cursor: + """Represents a position (line and column) in a text file.""" + + def __init__(self, line=-1, column=-1): + self.line = line + self.column = column + + def __eq__(self, rhs): + return self.line == rhs.line and self.column == rhs.column + + def __ne__(self, rhs): + return not self == rhs + + def __lt__(self, rhs): + return self.line < rhs.line or ( + self.line == rhs.line and self.column < rhs.column) + + def __le__(self, rhs): + return self < rhs or self == rhs + + def __gt__(self, rhs): + return rhs < self + + def __ge__(self, rhs): + return rhs <= self + + def __str__(self): + if self == Eof(): + return 'EOF' + else: + return '%s(%s)' % (self.line + 1, self.column) + + def __add__(self, offset): + return Cursor(self.line, self.column + offset) + + def __sub__(self, offset): + return Cursor(self.line, self.column - offset) + + def Clone(self): + """Returns a copy of self.""" + + return Cursor(self.line, self.column) + + +# Special cursor to indicate the end-of-file. +def Eof(): + """Returns the special cursor to denote the end-of-file.""" + return Cursor(-1, -1) + + +class Token: + """Represents a token in a Pump source file.""" + + def __init__(self, start=None, end=None, value=None, token_type=None): + if start is None: + self.start = Eof() + else: + self.start = start + if end is None: + self.end = Eof() + else: + self.end = end + self.value = value + self.token_type = token_type + + def __str__(self): + return 'Token @%s: \'%s\' type=%s' % ( + self.start, self.value, self.token_type) + + def Clone(self): + """Returns a copy of self.""" + + return Token(self.start.Clone(), self.end.Clone(), self.value, + self.token_type) + + +def StartsWith(lines, pos, string): + """Returns True iff the given position in lines starts with 'string'.""" + + return lines[pos.line][pos.column:].startswith(string) + + +def FindFirstInLine(line, token_table): + best_match_start = -1 + for (regex, token_type) in token_table: + m = regex.search(line) + if m: + # We found regex in lines + if best_match_start < 0 or m.start() < best_match_start: + best_match_start = m.start() + best_match_length = m.end() - m.start() + best_match_token_type = token_type + + if best_match_start < 0: + return None + + return (best_match_start, best_match_length, best_match_token_type) + + +def FindFirst(lines, token_table, cursor): + """Finds the first occurrence of any string in strings in lines.""" + + start = cursor.Clone() + cur_line_number = cursor.line + for line in lines[start.line:]: + if cur_line_number == start.line: + line = line[start.column:] + m = FindFirstInLine(line, token_table) + if m: + # We found a regex in line. + (start_column, length, token_type) = m + if cur_line_number == start.line: + start_column += start.column + found_start = Cursor(cur_line_number, start_column) + found_end = found_start + length + return MakeToken(lines, found_start, found_end, token_type) + cur_line_number += 1 + # We failed to find str in lines + return None + + +def SubString(lines, start, end): + """Returns a substring in lines.""" + + if end == Eof(): + end = Cursor(len(lines) - 1, len(lines[-1])) + + if start >= end: + return '' + + if start.line == end.line: + return lines[start.line][start.column:end.column] + + result_lines = ([lines[start.line][start.column:]] + + lines[start.line + 1:end.line] + + [lines[end.line][:end.column]]) + return ''.join(result_lines) + + +def StripMetaComments(str): + """Strip meta comments from each line in the given string.""" + + # First, completely remove lines containing nothing but a meta + # comment, including the trailing \n. + str = re.sub(r'^\s*\$\$.*\n', '', str) + + # Then, remove meta comments from contentful lines. + return re.sub(r'\s*\$\$.*', '', str) + + +def MakeToken(lines, start, end, token_type): + """Creates a new instance of Token.""" + + return Token(start, end, SubString(lines, start, end), token_type) + + +def ParseToken(lines, pos, regex, token_type): + line = lines[pos.line][pos.column:] + m = regex.search(line) + if m and not m.start(): + return MakeToken(lines, pos, pos + m.end(), token_type) + else: + print('ERROR: %s expected at %s.' % (token_type, pos)) + sys.exit(1) + + +ID_REGEX = re.compile(r'[_A-Za-z]\w*') +EQ_REGEX = re.compile(r'=') +REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)') +OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*') +WHITE_SPACE_REGEX = re.compile(r'\s') +DOT_DOT_REGEX = re.compile(r'\.\.') + + +def Skip(lines, pos, regex): + line = lines[pos.line][pos.column:] + m = re.search(regex, line) + if m and not m.start(): + return pos + m.end() + else: + return pos + + +def SkipUntil(lines, pos, regex, token_type): + line = lines[pos.line][pos.column:] + m = re.search(regex, line) + if m: + return pos + m.start() + else: + print ('ERROR: %s expected on line %s after column %s.' % + (token_type, pos.line + 1, pos.column)) + sys.exit(1) + + +def ParseExpTokenInParens(lines, pos): + def ParseInParens(pos): + pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX) + pos = Skip(lines, pos, r'\(') + pos = Parse(pos) + pos = Skip(lines, pos, r'\)') + return pos + + def Parse(pos): + pos = SkipUntil(lines, pos, r'\(|\)', ')') + if SubString(lines, pos, pos + 1) == '(': + pos = Parse(pos + 1) + pos = Skip(lines, pos, r'\)') + return Parse(pos) + else: + return pos + + start = pos.Clone() + pos = ParseInParens(pos) + return MakeToken(lines, start, pos, 'exp') + + +def RStripNewLineFromToken(token): + if token.value.endswith('\n'): + return Token(token.start, token.end, token.value[:-1], token.token_type) + else: + return token + + +def TokenizeLines(lines, pos): + while True: + found = FindFirst(lines, TOKEN_TABLE, pos) + if not found: + yield MakeToken(lines, pos, Eof(), 'code') + return + + if found.start == pos: + prev_token = None + prev_token_rstripped = None + else: + prev_token = MakeToken(lines, pos, found.start, 'code') + prev_token_rstripped = RStripNewLineFromToken(prev_token) + + if found.token_type == '$var': + if prev_token_rstripped: + yield prev_token_rstripped + yield found + id_token = ParseToken(lines, found.end, ID_REGEX, 'id') + yield id_token + pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX) + + eq_token = ParseToken(lines, pos, EQ_REGEX, '=') + yield eq_token + pos = Skip(lines, eq_token.end, r'\s*') + + if SubString(lines, pos, pos + 2) != '[[': + exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp') + yield exp_token + pos = Cursor(exp_token.end.line + 1, 0) + elif found.token_type == '$for': + if prev_token_rstripped: + yield prev_token_rstripped + yield found + id_token = ParseToken(lines, found.end, ID_REGEX, 'id') + yield id_token + pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX) + elif found.token_type == '$range': + if prev_token_rstripped: + yield prev_token_rstripped + yield found + id_token = ParseToken(lines, found.end, ID_REGEX, 'id') + yield id_token + pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX) + + dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..') + yield MakeToken(lines, pos, dots_pos, 'exp') + yield MakeToken(lines, dots_pos, dots_pos + 2, '..') + pos = dots_pos + 2 + new_pos = Cursor(pos.line + 1, 0) + yield MakeToken(lines, pos, new_pos, 'exp') + pos = new_pos + elif found.token_type == '$': + if prev_token: + yield prev_token + yield found + exp_token = ParseExpTokenInParens(lines, found.end) + yield exp_token + pos = exp_token.end + elif (found.token_type == ']]' or found.token_type == '$if' or + found.token_type == '$elif' or found.token_type == '$else'): + if prev_token_rstripped: + yield prev_token_rstripped + yield found + pos = found.end + else: + if prev_token: + yield prev_token + yield found + pos = found.end + + +def Tokenize(s): + """A generator that yields the tokens in the given string.""" + if s != '': + lines = s.splitlines(True) + for token in TokenizeLines(lines, Cursor(0, 0)): + yield token + + +class CodeNode: + def __init__(self, atomic_code_list=None): + self.atomic_code = atomic_code_list + + +class VarNode: + def __init__(self, identifier=None, atomic_code=None): + self.identifier = identifier + self.atomic_code = atomic_code + + +class RangeNode: + def __init__(self, identifier=None, exp1=None, exp2=None): + self.identifier = identifier + self.exp1 = exp1 + self.exp2 = exp2 + + +class ForNode: + def __init__(self, identifier=None, sep=None, code=None): + self.identifier = identifier + self.sep = sep + self.code = code + + +class ElseNode: + def __init__(self, else_branch=None): + self.else_branch = else_branch + + +class IfNode: + def __init__(self, exp=None, then_branch=None, else_branch=None): + self.exp = exp + self.then_branch = then_branch + self.else_branch = else_branch + + +class RawCodeNode: + def __init__(self, token=None): + self.raw_code = token + + +class LiteralDollarNode: + def __init__(self, token): + self.token = token + + +class ExpNode: + def __init__(self, token, python_exp): + self.token = token + self.python_exp = python_exp + + +def PopFront(a_list): + head = a_list[0] + a_list[:1] = [] + return head + + +def PushFront(a_list, elem): + a_list[:0] = [elem] + + +def PopToken(a_list, token_type=None): + token = PopFront(a_list) + if token_type is not None and token.token_type != token_type: + print('ERROR: %s expected at %s' % (token_type, token.start)) + print('ERROR: %s found instead' % (token,)) + sys.exit(1) + + return token + + +def PeekToken(a_list): + if not a_list: + return None + + return a_list[0] + + +def ParseExpNode(token): + python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value) + return ExpNode(token, python_exp) + + +def ParseElseNode(tokens): + def Pop(token_type=None): + return PopToken(tokens, token_type) + + next = PeekToken(tokens) + if not next: + return None + if next.token_type == '$else': + Pop('$else') + Pop('[[') + code_node = ParseCodeNode(tokens) + Pop(']]') + return code_node + elif next.token_type == '$elif': + Pop('$elif') + exp = Pop('code') + Pop('[[') + code_node = ParseCodeNode(tokens) + Pop(']]') + inner_else_node = ParseElseNode(tokens) + return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)]) + elif not next.value.strip(): + Pop('code') + return ParseElseNode(tokens) + else: + return None + + +def ParseAtomicCodeNode(tokens): + def Pop(token_type=None): + return PopToken(tokens, token_type) + + head = PopFront(tokens) + t = head.token_type + if t == 'code': + return RawCodeNode(head) + elif t == '$var': + id_token = Pop('id') + Pop('=') + next = PeekToken(tokens) + if next.token_type == 'exp': + exp_token = Pop() + return VarNode(id_token, ParseExpNode(exp_token)) + Pop('[[') + code_node = ParseCodeNode(tokens) + Pop(']]') + return VarNode(id_token, code_node) + elif t == '$for': + id_token = Pop('id') + next_token = PeekToken(tokens) + if next_token.token_type == 'code': + sep_token = next_token + Pop('code') + else: + sep_token = None + Pop('[[') + code_node = ParseCodeNode(tokens) + Pop(']]') + return ForNode(id_token, sep_token, code_node) + elif t == '$if': + exp_token = Pop('code') + Pop('[[') + code_node = ParseCodeNode(tokens) + Pop(']]') + else_node = ParseElseNode(tokens) + return IfNode(ParseExpNode(exp_token), code_node, else_node) + elif t == '$range': + id_token = Pop('id') + exp1_token = Pop('exp') + Pop('..') + exp2_token = Pop('exp') + return RangeNode(id_token, ParseExpNode(exp1_token), + ParseExpNode(exp2_token)) + elif t == '$id': + return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id')) + elif t == '$($)': + return LiteralDollarNode(head) + elif t == '$': + exp_token = Pop('exp') + return ParseExpNode(exp_token) + elif t == '[[': + code_node = ParseCodeNode(tokens) + Pop(']]') + return code_node + else: + PushFront(tokens, head) + return None + + +def ParseCodeNode(tokens): + atomic_code_list = [] + while True: + if not tokens: + break + atomic_code_node = ParseAtomicCodeNode(tokens) + if atomic_code_node: + atomic_code_list.append(atomic_code_node) + else: + break + return CodeNode(atomic_code_list) + + +def ParseToAST(pump_src_text): + """Convert the given Pump source text into an AST.""" + tokens = list(Tokenize(pump_src_text)) + code_node = ParseCodeNode(tokens) + return code_node + + +class Env: + def __init__(self): + self.variables = [] + self.ranges = [] + + def Clone(self): + clone = Env() + clone.variables = self.variables[:] + clone.ranges = self.ranges[:] + return clone + + def PushVariable(self, var, value): + # If value looks like an int, store it as an int. + try: + int_value = int(value) + if ('%s' % int_value) == value: + value = int_value + except Exception: + pass + self.variables[:0] = [(var, value)] + + def PopVariable(self): + self.variables[:1] = [] + + def PushRange(self, var, lower, upper): + self.ranges[:0] = [(var, lower, upper)] + + def PopRange(self): + self.ranges[:1] = [] + + def GetValue(self, identifier): + for (var, value) in self.variables: + if identifier == var: + return value + + print('ERROR: meta variable %s is undefined.' % (identifier,)) + sys.exit(1) + + def EvalExp(self, exp): + try: + result = eval(exp.python_exp) + except Exception as e: # pylint: disable=broad-except + print('ERROR: caught exception %s: %s' % (e.__class__.__name__, e)) + print('ERROR: failed to evaluate meta expression %s at %s' % + (exp.python_exp, exp.token.start)) + sys.exit(1) + return result + + def GetRange(self, identifier): + for (var, lower, upper) in self.ranges: + if identifier == var: + return (lower, upper) + + print('ERROR: range %s is undefined.' % (identifier,)) + sys.exit(1) + + +class Output: + def __init__(self): + self.string = '' + + def GetLastLine(self): + index = self.string.rfind('\n') + if index < 0: + return '' + + return self.string[index + 1:] + + def Append(self, s): + self.string += s + + +def RunAtomicCode(env, node, output): + if isinstance(node, VarNode): + identifier = node.identifier.value.strip() + result = Output() + RunAtomicCode(env.Clone(), node.atomic_code, result) + value = result.string + env.PushVariable(identifier, value) + elif isinstance(node, RangeNode): + identifier = node.identifier.value.strip() + lower = int(env.EvalExp(node.exp1)) + upper = int(env.EvalExp(node.exp2)) + env.PushRange(identifier, lower, upper) + elif isinstance(node, ForNode): + identifier = node.identifier.value.strip() + if node.sep is None: + sep = '' + else: + sep = node.sep.value + (lower, upper) = env.GetRange(identifier) + for i in range(lower, upper + 1): + new_env = env.Clone() + new_env.PushVariable(identifier, i) + RunCode(new_env, node.code, output) + if i != upper: + output.Append(sep) + elif isinstance(node, RawCodeNode): + output.Append(node.raw_code.value) + elif isinstance(node, IfNode): + cond = env.EvalExp(node.exp) + if cond: + RunCode(env.Clone(), node.then_branch, output) + elif node.else_branch is not None: + RunCode(env.Clone(), node.else_branch, output) + elif isinstance(node, ExpNode): + value = env.EvalExp(node) + output.Append('%s' % (value,)) + elif isinstance(node, LiteralDollarNode): + output.Append('$') + elif isinstance(node, CodeNode): + RunCode(env.Clone(), node, output) + else: + print('BAD') + print(node) + sys.exit(1) + + +def RunCode(env, code_node, output): + for atomic_code in code_node.atomic_code: + RunAtomicCode(env, atomic_code, output) + + +def IsSingleLineComment(cur_line): + return '//' in cur_line + + +def IsInPreprocessorDirective(prev_lines, cur_line): + if cur_line.lstrip().startswith('#'): + return True + return prev_lines and prev_lines[-1].endswith('\\') + + +def WrapComment(line, output): + loc = line.find('//') + before_comment = line[:loc].rstrip() + if before_comment == '': + indent = loc + else: + output.append(before_comment) + indent = len(before_comment) - len(before_comment.lstrip()) + prefix = indent*' ' + '// ' + max_len = 80 - len(prefix) + comment = line[loc + 2:].strip() + segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != ''] + cur_line = '' + for seg in segs: + if len((cur_line + seg).rstrip()) < max_len: + cur_line += seg + else: + if cur_line.strip() != '': + output.append(prefix + cur_line.rstrip()) + cur_line = seg.lstrip() + if cur_line.strip() != '': + output.append(prefix + cur_line.strip()) + + +def WrapCode(line, line_concat, output): + indent = len(line) - len(line.lstrip()) + prefix = indent*' ' # Prefix of the current line + max_len = 80 - indent - len(line_concat) # Maximum length of the current line + new_prefix = prefix + 4*' ' # Prefix of a continuation line + new_max_len = max_len - 4 # Maximum length of a continuation line + # Prefers to wrap a line after a ',' or ';'. + segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != ''] + cur_line = '' # The current line without leading spaces. + for seg in segs: + # If the line is still too long, wrap at a space. + while cur_line == '' and len(seg.strip()) > max_len: + seg = seg.lstrip() + split_at = seg.rfind(' ', 0, max_len) + output.append(prefix + seg[:split_at].strip() + line_concat) + seg = seg[split_at + 1:] + prefix = new_prefix + max_len = new_max_len + + if len((cur_line + seg).rstrip()) < max_len: + cur_line = (cur_line + seg).lstrip() + else: + output.append(prefix + cur_line.rstrip() + line_concat) + prefix = new_prefix + max_len = new_max_len + cur_line = seg.lstrip() + if cur_line.strip() != '': + output.append(prefix + cur_line.strip()) + + +def WrapPreprocessorDirective(line, output): + WrapCode(line, ' \\', output) + + +def WrapPlainCode(line, output): + WrapCode(line, '', output) + + +def IsMultiLineIWYUPragma(line): + return re.search(r'/\* IWYU pragma: ', line) + + +def IsHeaderGuardIncludeOrOneLineIWYUPragma(line): + return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or + re.match(r'^#include\s', line) or + # Don't break IWYU pragmas, either; that causes iwyu.py problems. + re.search(r'// IWYU pragma: ', line)) + + +def WrapLongLine(line, output): + line = line.rstrip() + if len(line) <= 80: + output.append(line) + elif IsSingleLineComment(line): + if IsHeaderGuardIncludeOrOneLineIWYUPragma(line): + # The style guide made an exception to allow long header guard lines, + # includes and IWYU pragmas. + output.append(line) + else: + WrapComment(line, output) + elif IsInPreprocessorDirective(output, line): + if IsHeaderGuardIncludeOrOneLineIWYUPragma(line): + # The style guide made an exception to allow long header guard lines, + # includes and IWYU pragmas. + output.append(line) + else: + WrapPreprocessorDirective(line, output) + elif IsMultiLineIWYUPragma(line): + output.append(line) + else: + WrapPlainCode(line, output) + + +def BeautifyCode(string): + lines = string.splitlines() + output = [] + for line in lines: + WrapLongLine(line, output) + output2 = [line.rstrip() for line in output] + return '\n'.join(output2) + '\n' + + +def ConvertFromPumpSource(src_text): + """Return the text generated from the given Pump source text.""" + ast = ParseToAST(StripMetaComments(src_text)) + output = Output() + RunCode(Env(), ast, output) + return BeautifyCode(output.string) + + +def main(argv): + if len(argv) == 1: + print(__doc__) + sys.exit(1) + + file_path = argv[-1] + output_str = ConvertFromPumpSource(file(file_path, 'r').read()) + if file_path.endswith('.pump'): + output_file_path = file_path[:-5] + else: + output_file_path = '-' + if output_file_path == '-': + print(output_str,) + else: + output_file = file(output_file_path, 'w') + output_file.write('// This file was GENERATED by command:\n') + output_file.write('// %s %s\n' % + (os.path.basename(__file__), os.path.basename(file_path))) + output_file.write('// DO NOT EDIT BY HAND!!!\n\n') + output_file.write(output_str) + output_file.close() + + +if __name__ == '__main__': + main(sys.argv) diff --git a/googletest/docs/pump_manual.md b/googletest/docs/pump_manual.md deleted file mode 100644 index 10b3c5f..0000000 --- a/googletest/docs/pump_manual.md +++ /dev/null @@ -1,190 +0,0 @@ -Pump is Useful for Meta Programming. - -# The Problem - -Template and macro libraries often need to define many classes, functions, or -macros that vary only (or almost only) in the number of arguments they take. -It's a lot of repetitive, mechanical, and error-prone work. - -Variadic templates and variadic macros can alleviate the problem. However, while -both are being considered by the C++ committee, neither is in the standard yet -or widely supported by compilers. Thus they are often not a good choice, -especially when your code needs to be portable. And their capabilities are still -limited. - -As a result, authors of such libraries often have to write scripts to generate -their implementation. However, our experience is that it's tedious to write such -scripts, which tend to reflect the structure of the generated code poorly and -are often hard to read and edit. For example, a small change needed in the -generated code may require some non-intuitive, non-trivial changes in the -script. This is especially painful when experimenting with the code. - -# Our Solution - -Pump (for Pump is Useful for Meta Programming, Pretty Useful for Meta -Programming, or Practical Utility for Meta Programming, whichever you prefer) is -a simple meta-programming tool for C++. The idea is that a programmer writes a -`foo.pump` file which contains C++ code plus meta code that manipulates the C++ -code. The meta code can handle iterations over a range, nested iterations, local -meta variable definitions, simple arithmetic, and conditional expressions. You -can view it as a small Domain-Specific Language. The meta language is designed -to be non-intrusive (s.t. it won't confuse Emacs' C++ mode, for example) and -concise, making Pump code intuitive and easy to maintain. - -## Highlights - -* The implementation is in a single Python script and thus ultra portable: no - build or installation is needed and it works cross platforms. -* Pump tries to be smart with respect to - [Google's style guide](https://github.com/google/styleguide): it breaks long - lines (easy to have when they are generated) at acceptable places to fit - within 80 columns and indent the continuation lines correctly. -* The format is human-readable and more concise than XML. -* The format works relatively well with Emacs' C++ mode. - -## Examples - -The following Pump code (where meta keywords start with `$`, `[[` and `]]` are -meta brackets, and `$$` starts a meta comment that ends with the line): - -``` -$var n = 3 $$ Defines a meta variable n. -$range i 0..n $$ Declares the range of meta iterator i (inclusive). -$for i [[ - $$ Meta loop. -// Foo$i does blah for $i-ary predicates. -$range j 1..i -template -class Foo$i { -$if i == 0 [[ - blah a; -]] $elif i <= 2 [[ - blah b; -]] $else [[ - blah c; -]] -}; - -]] -``` - -will be translated by the Pump compiler to: - -```cpp -// Foo0 does blah for 0-ary predicates. -template -class Foo0 { - blah a; -}; - -// Foo1 does blah for 1-ary predicates. -template -class Foo1 { - blah b; -}; - -// Foo2 does blah for 2-ary predicates. -template -class Foo2 { - blah b; -}; - -// Foo3 does blah for 3-ary predicates. -template -class Foo3 { - blah c; -}; -``` - -In another example, - -``` -$range i 1..n -Func($for i + [[a$i]]); -$$ The text between i and [[ is the separator between iterations. -``` - -will generate one of the following lines (without the comments), depending on -the value of `n`: - -```cpp -Func(); // If n is 0. -Func(a1); // If n is 1. -Func(a1 + a2); // If n is 2. -Func(a1 + a2 + a3); // If n is 3. -// And so on... -``` - -## Constructs - -We support the following meta programming constructs: - -| `$var id = exp` | Defines a named constant value. `$id` is | -: : valid util the end of the current meta : -: : lexical block. : -| :------------------------------- | :--------------------------------------- | -| `$range id exp..exp` | Sets the range of an iteration variable, | -: : which can be reused in multiple loops : -: : later. : -| `$for id sep [[ code ]]` | Iteration. The range of `id` must have | -: : been defined earlier. `$id` is valid in : -: : `code`. : -| `$($)` | Generates a single `$` character. | -| `$id` | Value of the named constant or iteration | -: : variable. : -| `$(exp)` | Value of the expression. | -| `$if exp [[ code ]] else_branch` | Conditional. | -| `[[ code ]]` | Meta lexical block. | -| `cpp_code` | Raw C++ code. | -| `$$ comment` | Meta comment. | - -**Note:** To give the user some freedom in formatting the Pump source code, Pump -ignores a new-line character if it's right after `$for foo` or next to `[[` or -`]]`. Without this rule you'll often be forced to write very long lines to get -the desired output. Therefore sometimes you may need to insert an extra new-line -in such places for a new-line to show up in your output. - -## Grammar - -```ebnf -code ::= atomic_code* -atomic_code ::= $var id = exp - | $var id = [[ code ]] - | $range id exp..exp - | $for id sep [[ code ]] - | $($) - | $id - | $(exp) - | $if exp [[ code ]] else_branch - | [[ code ]] - | cpp_code -sep ::= cpp_code | empty_string -else_branch ::= $else [[ code ]] - | $elif exp [[ code ]] else_branch - | empty_string -exp ::= simple_expression_in_Python_syntax -``` - -## Code - -You can find the source code of Pump in [scripts/pump.py](../scripts/pump.py). -It is still very unpolished and lacks automated tests, although it has been -successfully used many times. If you find a chance to use it in your project, -please let us know what you think! We also welcome help on improving Pump. - -## Real Examples - -You can find real-world applications of Pump in -[Google Test](https://github.com/google/googletest/tree/master/googletest) and -[Google Mock](https://github.com/google/googletest/tree/master/googlemock). The -source file `foo.h.pump` generates `foo.h`. - -## Tips - -* If a meta variable is followed by a letter or digit, you can separate them - using `[[]]`, which inserts an empty string. For example `Foo$j[[]]Helper` - generate `Foo1Helper` when `j` is 1. -* To avoid extra-long Pump source lines, you can break a line anywhere you - want by inserting `[[]]` followed by a new line. Since any new-line - character next to `[[` or `]]` is ignored, the generated code won't contain - this new line. diff --git a/googletest/scripts/pump.py b/googletest/scripts/pump.py deleted file mode 100755 index 66e3217..0000000 --- a/googletest/scripts/pump.py +++ /dev/null @@ -1,855 +0,0 @@ -#!/usr/bin/env python2.7 -# -# Copyright 2008, Google Inc. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are -# met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following disclaimer -# in the documentation and/or other materials provided with the -# distribution. -# * Neither the name of Google Inc. nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -"""pump v0.2.0 - Pretty Useful for Meta Programming. - -A tool for preprocessor meta programming. Useful for generating -repetitive boilerplate code. Especially useful for writing C++ -classes, functions, macros, and templates that need to work with -various number of arguments. - -USAGE: - pump.py SOURCE_FILE - -EXAMPLES: - pump.py foo.cc.pump - Converts foo.cc.pump to foo.cc. - -GRAMMAR: - CODE ::= ATOMIC_CODE* - ATOMIC_CODE ::= $var ID = EXPRESSION - | $var ID = [[ CODE ]] - | $range ID EXPRESSION..EXPRESSION - | $for ID SEPARATOR [[ CODE ]] - | $($) - | $ID - | $(EXPRESSION) - | $if EXPRESSION [[ CODE ]] ELSE_BRANCH - | [[ CODE ]] - | RAW_CODE - SEPARATOR ::= RAW_CODE | EMPTY - ELSE_BRANCH ::= $else [[ CODE ]] - | $elif EXPRESSION [[ CODE ]] ELSE_BRANCH - | EMPTY - EXPRESSION has Python syntax. -""" - -from __future__ import print_function - -import os -import re -import sys - - -TOKEN_TABLE = [ - (re.compile(r'\$var\s+'), '$var'), - (re.compile(r'\$elif\s+'), '$elif'), - (re.compile(r'\$else\s+'), '$else'), - (re.compile(r'\$for\s+'), '$for'), - (re.compile(r'\$if\s+'), '$if'), - (re.compile(r'\$range\s+'), '$range'), - (re.compile(r'\$[_A-Za-z]\w*'), '$id'), - (re.compile(r'\$\(\$\)'), '$($)'), - (re.compile(r'\$'), '$'), - (re.compile(r'\[\[\n?'), '[['), - (re.compile(r'\]\]\n?'), ']]'), - ] - - -class Cursor: - """Represents a position (line and column) in a text file.""" - - def __init__(self, line=-1, column=-1): - self.line = line - self.column = column - - def __eq__(self, rhs): - return self.line == rhs.line and self.column == rhs.column - - def __ne__(self, rhs): - return not self == rhs - - def __lt__(self, rhs): - return self.line < rhs.line or ( - self.line == rhs.line and self.column < rhs.column) - - def __le__(self, rhs): - return self < rhs or self == rhs - - def __gt__(self, rhs): - return rhs < self - - def __ge__(self, rhs): - return rhs <= self - - def __str__(self): - if self == Eof(): - return 'EOF' - else: - return '%s(%s)' % (self.line + 1, self.column) - - def __add__(self, offset): - return Cursor(self.line, self.column + offset) - - def __sub__(self, offset): - return Cursor(self.line, self.column - offset) - - def Clone(self): - """Returns a copy of self.""" - - return Cursor(self.line, self.column) - - -# Special cursor to indicate the end-of-file. -def Eof(): - """Returns the special cursor to denote the end-of-file.""" - return Cursor(-1, -1) - - -class Token: - """Represents a token in a Pump source file.""" - - def __init__(self, start=None, end=None, value=None, token_type=None): - if start is None: - self.start = Eof() - else: - self.start = start - if end is None: - self.end = Eof() - else: - self.end = end - self.value = value - self.token_type = token_type - - def __str__(self): - return 'Token @%s: \'%s\' type=%s' % ( - self.start, self.value, self.token_type) - - def Clone(self): - """Returns a copy of self.""" - - return Token(self.start.Clone(), self.end.Clone(), self.value, - self.token_type) - - -def StartsWith(lines, pos, string): - """Returns True iff the given position in lines starts with 'string'.""" - - return lines[pos.line][pos.column:].startswith(string) - - -def FindFirstInLine(line, token_table): - best_match_start = -1 - for (regex, token_type) in token_table: - m = regex.search(line) - if m: - # We found regex in lines - if best_match_start < 0 or m.start() < best_match_start: - best_match_start = m.start() - best_match_length = m.end() - m.start() - best_match_token_type = token_type - - if best_match_start < 0: - return None - - return (best_match_start, best_match_length, best_match_token_type) - - -def FindFirst(lines, token_table, cursor): - """Finds the first occurrence of any string in strings in lines.""" - - start = cursor.Clone() - cur_line_number = cursor.line - for line in lines[start.line:]: - if cur_line_number == start.line: - line = line[start.column:] - m = FindFirstInLine(line, token_table) - if m: - # We found a regex in line. - (start_column, length, token_type) = m - if cur_line_number == start.line: - start_column += start.column - found_start = Cursor(cur_line_number, start_column) - found_end = found_start + length - return MakeToken(lines, found_start, found_end, token_type) - cur_line_number += 1 - # We failed to find str in lines - return None - - -def SubString(lines, start, end): - """Returns a substring in lines.""" - - if end == Eof(): - end = Cursor(len(lines) - 1, len(lines[-1])) - - if start >= end: - return '' - - if start.line == end.line: - return lines[start.line][start.column:end.column] - - result_lines = ([lines[start.line][start.column:]] + - lines[start.line + 1:end.line] + - [lines[end.line][:end.column]]) - return ''.join(result_lines) - - -def StripMetaComments(str): - """Strip meta comments from each line in the given string.""" - - # First, completely remove lines containing nothing but a meta - # comment, including the trailing \n. - str = re.sub(r'^\s*\$\$.*\n', '', str) - - # Then, remove meta comments from contentful lines. - return re.sub(r'\s*\$\$.*', '', str) - - -def MakeToken(lines, start, end, token_type): - """Creates a new instance of Token.""" - - return Token(start, end, SubString(lines, start, end), token_type) - - -def ParseToken(lines, pos, regex, token_type): - line = lines[pos.line][pos.column:] - m = regex.search(line) - if m and not m.start(): - return MakeToken(lines, pos, pos + m.end(), token_type) - else: - print('ERROR: %s expected at %s.' % (token_type, pos)) - sys.exit(1) - - -ID_REGEX = re.compile(r'[_A-Za-z]\w*') -EQ_REGEX = re.compile(r'=') -REST_OF_LINE_REGEX = re.compile(r'.*?(?=$|\$\$)') -OPTIONAL_WHITE_SPACES_REGEX = re.compile(r'\s*') -WHITE_SPACE_REGEX = re.compile(r'\s') -DOT_DOT_REGEX = re.compile(r'\.\.') - - -def Skip(lines, pos, regex): - line = lines[pos.line][pos.column:] - m = re.search(regex, line) - if m and not m.start(): - return pos + m.end() - else: - return pos - - -def SkipUntil(lines, pos, regex, token_type): - line = lines[pos.line][pos.column:] - m = re.search(regex, line) - if m: - return pos + m.start() - else: - print ('ERROR: %s expected on line %s after column %s.' % - (token_type, pos.line + 1, pos.column)) - sys.exit(1) - - -def ParseExpTokenInParens(lines, pos): - def ParseInParens(pos): - pos = Skip(lines, pos, OPTIONAL_WHITE_SPACES_REGEX) - pos = Skip(lines, pos, r'\(') - pos = Parse(pos) - pos = Skip(lines, pos, r'\)') - return pos - - def Parse(pos): - pos = SkipUntil(lines, pos, r'\(|\)', ')') - if SubString(lines, pos, pos + 1) == '(': - pos = Parse(pos + 1) - pos = Skip(lines, pos, r'\)') - return Parse(pos) - else: - return pos - - start = pos.Clone() - pos = ParseInParens(pos) - return MakeToken(lines, start, pos, 'exp') - - -def RStripNewLineFromToken(token): - if token.value.endswith('\n'): - return Token(token.start, token.end, token.value[:-1], token.token_type) - else: - return token - - -def TokenizeLines(lines, pos): - while True: - found = FindFirst(lines, TOKEN_TABLE, pos) - if not found: - yield MakeToken(lines, pos, Eof(), 'code') - return - - if found.start == pos: - prev_token = None - prev_token_rstripped = None - else: - prev_token = MakeToken(lines, pos, found.start, 'code') - prev_token_rstripped = RStripNewLineFromToken(prev_token) - - if found.token_type == '$var': - if prev_token_rstripped: - yield prev_token_rstripped - yield found - id_token = ParseToken(lines, found.end, ID_REGEX, 'id') - yield id_token - pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX) - - eq_token = ParseToken(lines, pos, EQ_REGEX, '=') - yield eq_token - pos = Skip(lines, eq_token.end, r'\s*') - - if SubString(lines, pos, pos + 2) != '[[': - exp_token = ParseToken(lines, pos, REST_OF_LINE_REGEX, 'exp') - yield exp_token - pos = Cursor(exp_token.end.line + 1, 0) - elif found.token_type == '$for': - if prev_token_rstripped: - yield prev_token_rstripped - yield found - id_token = ParseToken(lines, found.end, ID_REGEX, 'id') - yield id_token - pos = Skip(lines, id_token.end, WHITE_SPACE_REGEX) - elif found.token_type == '$range': - if prev_token_rstripped: - yield prev_token_rstripped - yield found - id_token = ParseToken(lines, found.end, ID_REGEX, 'id') - yield id_token - pos = Skip(lines, id_token.end, OPTIONAL_WHITE_SPACES_REGEX) - - dots_pos = SkipUntil(lines, pos, DOT_DOT_REGEX, '..') - yield MakeToken(lines, pos, dots_pos, 'exp') - yield MakeToken(lines, dots_pos, dots_pos + 2, '..') - pos = dots_pos + 2 - new_pos = Cursor(pos.line + 1, 0) - yield MakeToken(lines, pos, new_pos, 'exp') - pos = new_pos - elif found.token_type == '$': - if prev_token: - yield prev_token - yield found - exp_token = ParseExpTokenInParens(lines, found.end) - yield exp_token - pos = exp_token.end - elif (found.token_type == ']]' or found.token_type == '$if' or - found.token_type == '$elif' or found.token_type == '$else'): - if prev_token_rstripped: - yield prev_token_rstripped - yield found - pos = found.end - else: - if prev_token: - yield prev_token - yield found - pos = found.end - - -def Tokenize(s): - """A generator that yields the tokens in the given string.""" - if s != '': - lines = s.splitlines(True) - for token in TokenizeLines(lines, Cursor(0, 0)): - yield token - - -class CodeNode: - def __init__(self, atomic_code_list=None): - self.atomic_code = atomic_code_list - - -class VarNode: - def __init__(self, identifier=None, atomic_code=None): - self.identifier = identifier - self.atomic_code = atomic_code - - -class RangeNode: - def __init__(self, identifier=None, exp1=None, exp2=None): - self.identifier = identifier - self.exp1 = exp1 - self.exp2 = exp2 - - -class ForNode: - def __init__(self, identifier=None, sep=None, code=None): - self.identifier = identifier - self.sep = sep - self.code = code - - -class ElseNode: - def __init__(self, else_branch=None): - self.else_branch = else_branch - - -class IfNode: - def __init__(self, exp=None, then_branch=None, else_branch=None): - self.exp = exp - self.then_branch = then_branch - self.else_branch = else_branch - - -class RawCodeNode: - def __init__(self, token=None): - self.raw_code = token - - -class LiteralDollarNode: - def __init__(self, token): - self.token = token - - -class ExpNode: - def __init__(self, token, python_exp): - self.token = token - self.python_exp = python_exp - - -def PopFront(a_list): - head = a_list[0] - a_list[:1] = [] - return head - - -def PushFront(a_list, elem): - a_list[:0] = [elem] - - -def PopToken(a_list, token_type=None): - token = PopFront(a_list) - if token_type is not None and token.token_type != token_type: - print('ERROR: %s expected at %s' % (token_type, token.start)) - print('ERROR: %s found instead' % (token,)) - sys.exit(1) - - return token - - -def PeekToken(a_list): - if not a_list: - return None - - return a_list[0] - - -def ParseExpNode(token): - python_exp = re.sub(r'([_A-Za-z]\w*)', r'self.GetValue("\1")', token.value) - return ExpNode(token, python_exp) - - -def ParseElseNode(tokens): - def Pop(token_type=None): - return PopToken(tokens, token_type) - - next = PeekToken(tokens) - if not next: - return None - if next.token_type == '$else': - Pop('$else') - Pop('[[') - code_node = ParseCodeNode(tokens) - Pop(']]') - return code_node - elif next.token_type == '$elif': - Pop('$elif') - exp = Pop('code') - Pop('[[') - code_node = ParseCodeNode(tokens) - Pop(']]') - inner_else_node = ParseElseNode(tokens) - return CodeNode([IfNode(ParseExpNode(exp), code_node, inner_else_node)]) - elif not next.value.strip(): - Pop('code') - return ParseElseNode(tokens) - else: - return None - - -def ParseAtomicCodeNode(tokens): - def Pop(token_type=None): - return PopToken(tokens, token_type) - - head = PopFront(tokens) - t = head.token_type - if t == 'code': - return RawCodeNode(head) - elif t == '$var': - id_token = Pop('id') - Pop('=') - next = PeekToken(tokens) - if next.token_type == 'exp': - exp_token = Pop() - return VarNode(id_token, ParseExpNode(exp_token)) - Pop('[[') - code_node = ParseCodeNode(tokens) - Pop(']]') - return VarNode(id_token, code_node) - elif t == '$for': - id_token = Pop('id') - next_token = PeekToken(tokens) - if next_token.token_type == 'code': - sep_token = next_token - Pop('code') - else: - sep_token = None - Pop('[[') - code_node = ParseCodeNode(tokens) - Pop(']]') - return ForNode(id_token, sep_token, code_node) - elif t == '$if': - exp_token = Pop('code') - Pop('[[') - code_node = ParseCodeNode(tokens) - Pop(']]') - else_node = ParseElseNode(tokens) - return IfNode(ParseExpNode(exp_token), code_node, else_node) - elif t == '$range': - id_token = Pop('id') - exp1_token = Pop('exp') - Pop('..') - exp2_token = Pop('exp') - return RangeNode(id_token, ParseExpNode(exp1_token), - ParseExpNode(exp2_token)) - elif t == '$id': - return ParseExpNode(Token(head.start + 1, head.end, head.value[1:], 'id')) - elif t == '$($)': - return LiteralDollarNode(head) - elif t == '$': - exp_token = Pop('exp') - return ParseExpNode(exp_token) - elif t == '[[': - code_node = ParseCodeNode(tokens) - Pop(']]') - return code_node - else: - PushFront(tokens, head) - return None - - -def ParseCodeNode(tokens): - atomic_code_list = [] - while True: - if not tokens: - break - atomic_code_node = ParseAtomicCodeNode(tokens) - if atomic_code_node: - atomic_code_list.append(atomic_code_node) - else: - break - return CodeNode(atomic_code_list) - - -def ParseToAST(pump_src_text): - """Convert the given Pump source text into an AST.""" - tokens = list(Tokenize(pump_src_text)) - code_node = ParseCodeNode(tokens) - return code_node - - -class Env: - def __init__(self): - self.variables = [] - self.ranges = [] - - def Clone(self): - clone = Env() - clone.variables = self.variables[:] - clone.ranges = self.ranges[:] - return clone - - def PushVariable(self, var, value): - # If value looks like an int, store it as an int. - try: - int_value = int(value) - if ('%s' % int_value) == value: - value = int_value - except Exception: - pass - self.variables[:0] = [(var, value)] - - def PopVariable(self): - self.variables[:1] = [] - - def PushRange(self, var, lower, upper): - self.ranges[:0] = [(var, lower, upper)] - - def PopRange(self): - self.ranges[:1] = [] - - def GetValue(self, identifier): - for (var, value) in self.variables: - if identifier == var: - return value - - print('ERROR: meta variable %s is undefined.' % (identifier,)) - sys.exit(1) - - def EvalExp(self, exp): - try: - result = eval(exp.python_exp) - except Exception as e: # pylint: disable=broad-except - print('ERROR: caught exception %s: %s' % (e.__class__.__name__, e)) - print('ERROR: failed to evaluate meta expression %s at %s' % - (exp.python_exp, exp.token.start)) - sys.exit(1) - return result - - def GetRange(self, identifier): - for (var, lower, upper) in self.ranges: - if identifier == var: - return (lower, upper) - - print('ERROR: range %s is undefined.' % (identifier,)) - sys.exit(1) - - -class Output: - def __init__(self): - self.string = '' - - def GetLastLine(self): - index = self.string.rfind('\n') - if index < 0: - return '' - - return self.string[index + 1:] - - def Append(self, s): - self.string += s - - -def RunAtomicCode(env, node, output): - if isinstance(node, VarNode): - identifier = node.identifier.value.strip() - result = Output() - RunAtomicCode(env.Clone(), node.atomic_code, result) - value = result.string - env.PushVariable(identifier, value) - elif isinstance(node, RangeNode): - identifier = node.identifier.value.strip() - lower = int(env.EvalExp(node.exp1)) - upper = int(env.EvalExp(node.exp2)) - env.PushRange(identifier, lower, upper) - elif isinstance(node, ForNode): - identifier = node.identifier.value.strip() - if node.sep is None: - sep = '' - else: - sep = node.sep.value - (lower, upper) = env.GetRange(identifier) - for i in range(lower, upper + 1): - new_env = env.Clone() - new_env.PushVariable(identifier, i) - RunCode(new_env, node.code, output) - if i != upper: - output.Append(sep) - elif isinstance(node, RawCodeNode): - output.Append(node.raw_code.value) - elif isinstance(node, IfNode): - cond = env.EvalExp(node.exp) - if cond: - RunCode(env.Clone(), node.then_branch, output) - elif node.else_branch is not None: - RunCode(env.Clone(), node.else_branch, output) - elif isinstance(node, ExpNode): - value = env.EvalExp(node) - output.Append('%s' % (value,)) - elif isinstance(node, LiteralDollarNode): - output.Append('$') - elif isinstance(node, CodeNode): - RunCode(env.Clone(), node, output) - else: - print('BAD') - print(node) - sys.exit(1) - - -def RunCode(env, code_node, output): - for atomic_code in code_node.atomic_code: - RunAtomicCode(env, atomic_code, output) - - -def IsSingleLineComment(cur_line): - return '//' in cur_line - - -def IsInPreprocessorDirective(prev_lines, cur_line): - if cur_line.lstrip().startswith('#'): - return True - return prev_lines and prev_lines[-1].endswith('\\') - - -def WrapComment(line, output): - loc = line.find('//') - before_comment = line[:loc].rstrip() - if before_comment == '': - indent = loc - else: - output.append(before_comment) - indent = len(before_comment) - len(before_comment.lstrip()) - prefix = indent*' ' + '// ' - max_len = 80 - len(prefix) - comment = line[loc + 2:].strip() - segs = [seg for seg in re.split(r'(\w+\W*)', comment) if seg != ''] - cur_line = '' - for seg in segs: - if len((cur_line + seg).rstrip()) < max_len: - cur_line += seg - else: - if cur_line.strip() != '': - output.append(prefix + cur_line.rstrip()) - cur_line = seg.lstrip() - if cur_line.strip() != '': - output.append(prefix + cur_line.strip()) - - -def WrapCode(line, line_concat, output): - indent = len(line) - len(line.lstrip()) - prefix = indent*' ' # Prefix of the current line - max_len = 80 - indent - len(line_concat) # Maximum length of the current line - new_prefix = prefix + 4*' ' # Prefix of a continuation line - new_max_len = max_len - 4 # Maximum length of a continuation line - # Prefers to wrap a line after a ',' or ';'. - segs = [seg for seg in re.split(r'([^,;]+[,;]?)', line.strip()) if seg != ''] - cur_line = '' # The current line without leading spaces. - for seg in segs: - # If the line is still too long, wrap at a space. - while cur_line == '' and len(seg.strip()) > max_len: - seg = seg.lstrip() - split_at = seg.rfind(' ', 0, max_len) - output.append(prefix + seg[:split_at].strip() + line_concat) - seg = seg[split_at + 1:] - prefix = new_prefix - max_len = new_max_len - - if len((cur_line + seg).rstrip()) < max_len: - cur_line = (cur_line + seg).lstrip() - else: - output.append(prefix + cur_line.rstrip() + line_concat) - prefix = new_prefix - max_len = new_max_len - cur_line = seg.lstrip() - if cur_line.strip() != '': - output.append(prefix + cur_line.strip()) - - -def WrapPreprocessorDirective(line, output): - WrapCode(line, ' \\', output) - - -def WrapPlainCode(line, output): - WrapCode(line, '', output) - - -def IsMultiLineIWYUPragma(line): - return re.search(r'/\* IWYU pragma: ', line) - - -def IsHeaderGuardIncludeOrOneLineIWYUPragma(line): - return (re.match(r'^#(ifndef|define|endif\s*//)\s*[\w_]+\s*$', line) or - re.match(r'^#include\s', line) or - # Don't break IWYU pragmas, either; that causes iwyu.py problems. - re.search(r'// IWYU pragma: ', line)) - - -def WrapLongLine(line, output): - line = line.rstrip() - if len(line) <= 80: - output.append(line) - elif IsSingleLineComment(line): - if IsHeaderGuardIncludeOrOneLineIWYUPragma(line): - # The style guide made an exception to allow long header guard lines, - # includes and IWYU pragmas. - output.append(line) - else: - WrapComment(line, output) - elif IsInPreprocessorDirective(output, line): - if IsHeaderGuardIncludeOrOneLineIWYUPragma(line): - # The style guide made an exception to allow long header guard lines, - # includes and IWYU pragmas. - output.append(line) - else: - WrapPreprocessorDirective(line, output) - elif IsMultiLineIWYUPragma(line): - output.append(line) - else: - WrapPlainCode(line, output) - - -def BeautifyCode(string): - lines = string.splitlines() - output = [] - for line in lines: - WrapLongLine(line, output) - output2 = [line.rstrip() for line in output] - return '\n'.join(output2) + '\n' - - -def ConvertFromPumpSource(src_text): - """Return the text generated from the given Pump source text.""" - ast = ParseToAST(StripMetaComments(src_text)) - output = Output() - RunCode(Env(), ast, output) - return BeautifyCode(output.string) - - -def main(argv): - if len(argv) == 1: - print(__doc__) - sys.exit(1) - - file_path = argv[-1] - output_str = ConvertFromPumpSource(file(file_path, 'r').read()) - if file_path.endswith('.pump'): - output_file_path = file_path[:-5] - else: - output_file_path = '-' - if output_file_path == '-': - print(output_str,) - else: - output_file = file(output_file_path, 'w') - output_file.write('// This file was GENERATED by command:\n') - output_file.write('// %s %s\n' % - (os.path.basename(__file__), os.path.basename(file_path))) - output_file.write('// DO NOT EDIT BY HAND!!!\n\n') - output_file.write(output_str) - output_file.close() - - -if __name__ == '__main__': - main(sys.argv) -- cgit v0.12