diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2002-11-22 08:36:54 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2002-11-22 08:36:54 (GMT) |
commit | 0d1fdea8efc48560d90374d8b785aee26ae82b70 (patch) | |
tree | 71e9606a5afcba430c6dd2a16e1ee9d0335d30e6 /Tools/i18n/pygettext.py | |
parent | 4581cfa326cf7d8b9d7888d4c0e96ee88950bcfa (diff) | |
download | cpython-0d1fdea8efc48560d90374d8b785aee26ae82b70.zip cpython-0d1fdea8efc48560d90374d8b785aee26ae82b70.tar.gz cpython-0d1fdea8efc48560d90374d8b785aee26ae82b70.tar.bz2 |
Patch #494845: Support string concatenation, detect non-string data,
add globbing support, find modules by name instead of by file.
Diffstat (limited to 'Tools/i18n/pygettext.py')
-rwxr-xr-x | Tools/i18n/pygettext.py | 200 |
1 files changed, 163 insertions, 37 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index b40bda1..d0320cf 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -1,31 +1,49 @@ #! /usr/bin/env python +# -*- coding: iso-8859-1 -*- # Originally written by Barry Warsaw <barry@zope.com> # # Minimally patched to make it even more xgettext compatible # by Peter Funk <pf@artcom-gmbh.de> +# +# 2002-11-22 Jürgen Hermann <jh@web.de> +# Added checks that _() only contains string literals, and +# command line args are resolved to module lists, i.e. you +# can now pass a filename, a module or package name, or a +# directory (including globbing chars, important for Win32). +# Made docstring fit in 80 chars wide displays using pydoc. +# -"""pygettext -- Python equivalent of xgettext(1) +# for selftesting +try: + import fintl + _ = fintl.gettext +except ImportError: + _ = lambda s: s + +__doc__ = _("""pygettext -- Python equivalent of xgettext(1) Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the -internationalization of C programs. Most of these tools are independent of -the programming language and can be used from within Python programs. Martin -von Loewis' work[1] helps considerably in this regard. +internationalization of C programs. Most of these tools are independent of +the programming language and can be used from within Python programs. +Martin von Loewis' work[1] helps considerably in this regard. There's one problem though; xgettext is the program that scans source code -looking for message strings, but it groks only C (or C++). Python introduces -a few wrinkles, such as dual quoting characters, triple quoted strings, and -raw strings. xgettext understands none of this. - -Enter pygettext, which uses Python's standard tokenize module to scan Python -source code, generating .pot files identical to what GNU xgettext[2] generates -for C and C++ code. From there, the standard GNU tools can be used. - -A word about marking Python strings as candidates for translation. GNU -xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and -gettext_noop. But those can be a lot of text to include all over your code. -C and C++ have a trick: they use the C preprocessor. Most internationalized C -source includes a #define for gettext() to _() so that what has to be written -in the source is much less. Thus these are both translatable strings: +looking for message strings, but it groks only C (or C++). Python +introduces a few wrinkles, such as dual quoting characters, triple quoted +strings, and raw strings. xgettext understands none of this. + +Enter pygettext, which uses Python's standard tokenize module to scan +Python source code, generating .pot files identical to what GNU xgettext[2] +generates for C and C++ code. From there, the standard GNU tools can be +used. + +A word about marking Python strings as candidates for translation. GNU +xgettext recognizes the following keywords: gettext, dgettext, dcgettext, +and gettext_noop. But those can be a lot of text to include all over your +code. C and C++ have a trick: they use the C preprocessor. Most +internationalized C source includes a #define for gettext() to _() so that +what has to be written in the source is much less. Thus these are both +translatable strings: gettext("Translatable String") _("Translatable String") @@ -37,11 +55,11 @@ below for how to augment this. [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html [2] http://www.gnu.org/software/gettext/gettext.html -NOTE: pygettext attempts to be option and feature compatible with GNU xgettext -where ever possible. However some options are still missing or are not fully -implemented. Also, xgettext's use of command line switches with option -arguments is broken, and in these cases, pygettext just defines additional -switches. +NOTE: pygettext attempts to be option and feature compatible with GNU +xgettext where ever possible. However some options are still missing or are +not fully implemented. Also, xgettext's use of command line switches with +option arguments is broken, and in these cases, pygettext just defines +additional switches. Usage: pygettext [options] inputfile ... @@ -61,9 +79,9 @@ Options: -D --docstrings - Extract module, class, method, and function docstrings. These do not - need to be wrapped in _() markers, and in fact cannot be for Python to - consider them docstrings. (See also the -X option). + Extract module, class, method, and function docstrings. These do + not need to be wrapped in _() markers, and in fact cannot be for + Python to consider them docstrings. (See also the -X option). -h --help @@ -135,23 +153,17 @@ Options: conjunction with the -D option above. If `inputfile' is -, standard input is read. -""" +""") import os import sys import time import getopt +import token import tokenize import operator -# for selftesting -try: - import fintl - _ = fintl.gettext -except ImportError: - def _(s): return s - -__version__ = '1.4' +__version__ = '1.5' default_keywords = ['_'] DEFAULTKEYWORDS = ', '.join(default_keywords) @@ -183,7 +195,7 @@ msgstr "" def usage(code, msg=''): - print >> sys.stderr, _(__doc__) % globals() + print >> sys.stderr, __doc__ % globals() if msg: print >> sys.stderr, msg sys.exit(code) @@ -242,6 +254,103 @@ def normalize(s): s = '""\n"' + lineterm.join(lines) + '"' return s + +def containsAny(str, set): + """ Check whether 'str' contains ANY of the chars in 'set' + """ + return 1 in [c in str for c in set] + + +def _visit_pyfiles(list, dirname, names): + """ Helper for getFilesForName(). + """ + # get extension for python source files + if not globals().has_key('_py_ext'): + import imp + global _py_ext + _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0] + + # don't recurse into CVS directories + if 'CVS' in names: + names.remove('CVS') + + # add all *.py files to list + list.extend( + [os.path.join(dirname, file) + for file in names + if os.path.splitext(file)[1] == _py_ext]) + + +def _get_modpkg_path(dotted_name, pathlist=None): + """ Get the filesystem path for a module or a package. + + Return the file system path to a file for a module, + and to a directory for a package. Return None if + the name is not found, or is a builtin or extension module. + """ + import imp + + # split off top-most name + parts = dotted_name.split('.', 1) + + if len(parts) > 1: + # we have a dotted path, import top-level package + try: + file, pathname, description = imp.find_module(parts[0], pathlist) + if file: file.close() + except ImportError: + return None + + # check if it's indeed a package + if description[2] == imp.PKG_DIRECTORY: + # recursively handle the remaining name parts + pathname = _get_modpkg_path(parts[1], [pathname]) + else: + pathname = None + else: + # plain name + try: + file, pathname, description = imp.find_module(dotted_name, pathlist) + if file: file.close() + if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: + pathname = None + except ImportError: + pathname = None + + return pathname + + +def getFilesForName(name): + """ Get a list of module files for a filename, a module or package name, + or a directory. + """ + import imp + + if not os.path.exists(name): + # check for glob chars + if containsAny(name, "*?[]"): + import glob + files = glob.glob(name) + list = [] + for file in files: + list.extend(getFilesForName(file)) + return list + + # try to find module or package + name = _get_modpkg_path(name) + if not name: + return [] + + if os.path.isdir(name): + # find all python files in directory + list = [] + os.path.walk(name, _visit_pyfiles, list) + return list + elif os.path.exists(name): + # a single file + return [name] + + return [] class TokenEater: @@ -314,7 +423,12 @@ class TokenEater: self.__state = self.__waiting elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) - # TBD: should we warn if we seen anything else? + elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, + token.NEWLINE, tokenize.NL]: + # warn if we see anything else than STRING or whitespace + print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % { + 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno} + self.__state = self.__waiting def __addentry(self, msg, lineno=None, isdocstring=0): if lineno is None: @@ -495,6 +609,15 @@ def main(): else: options.toexclude = [] + # resolve args to module lists + expanded = [] + for arg in args: + if arg == '-': + expanded.append(arg) + else: + expanded.extend(getFilesForName(arg)) + args = expanded + # slurp through all the files eater = TokenEater(options) for filename in args: @@ -539,3 +662,6 @@ if __name__ == '__main__': main() # some more test strings _(u'a unicode string') + _('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning + _('more' 'than' 'one' 'string') + |