summaryrefslogtreecommitdiffstats
path: root/Tools/i18n/pygettext.py
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2002-11-22 08:36:54 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2002-11-22 08:36:54 (GMT)
commit0d1fdea8efc48560d90374d8b785aee26ae82b70 (patch)
tree71e9606a5afcba430c6dd2a16e1ee9d0335d30e6 /Tools/i18n/pygettext.py
parent4581cfa326cf7d8b9d7888d4c0e96ee88950bcfa (diff)
downloadcpython-0d1fdea8efc48560d90374d8b785aee26ae82b70.zip
cpython-0d1fdea8efc48560d90374d8b785aee26ae82b70.tar.gz
cpython-0d1fdea8efc48560d90374d8b785aee26ae82b70.tar.bz2
Patch #494845: Support string concatenation, detect non-string data,
add globbing support, find modules by name instead of by file.
Diffstat (limited to 'Tools/i18n/pygettext.py')
-rwxr-xr-xTools/i18n/pygettext.py200
1 files changed, 163 insertions, 37 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index b40bda1..d0320cf 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -1,31 +1,49 @@
#! /usr/bin/env python
+# -*- coding: iso-8859-1 -*-
# Originally written by Barry Warsaw <barry@zope.com>
#
# Minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
+#
+# 2002-11-22 Jürgen Hermann <jh@web.de>
+# Added checks that _() only contains string literals, and
+# command line args are resolved to module lists, i.e. you
+# can now pass a filename, a module or package name, or a
+# directory (including globbing chars, important for Win32).
+# Made docstring fit in 80 chars wide displays using pydoc.
+#
-"""pygettext -- Python equivalent of xgettext(1)
+# for selftesting
+try:
+ import fintl
+ _ = fintl.gettext
+except ImportError:
+ _ = lambda s: s
+
+__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
-internationalization of C programs. Most of these tools are independent of
-the programming language and can be used from within Python programs. Martin
-von Loewis' work[1] helps considerably in this regard.
+internationalization of C programs. Most of these tools are independent of
+the programming language and can be used from within Python programs.
+Martin von Loewis' work[1] helps considerably in this regard.
There's one problem though; xgettext is the program that scans source code
-looking for message strings, but it groks only C (or C++). Python introduces
-a few wrinkles, such as dual quoting characters, triple quoted strings, and
-raw strings. xgettext understands none of this.
-
-Enter pygettext, which uses Python's standard tokenize module to scan Python
-source code, generating .pot files identical to what GNU xgettext[2] generates
-for C and C++ code. From there, the standard GNU tools can be used.
-
-A word about marking Python strings as candidates for translation. GNU
-xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and
-gettext_noop. But those can be a lot of text to include all over your code.
-C and C++ have a trick: they use the C preprocessor. Most internationalized C
-source includes a #define for gettext() to _() so that what has to be written
-in the source is much less. Thus these are both translatable strings:
+looking for message strings, but it groks only C (or C++). Python
+introduces a few wrinkles, such as dual quoting characters, triple quoted
+strings, and raw strings. xgettext understands none of this.
+
+Enter pygettext, which uses Python's standard tokenize module to scan
+Python source code, generating .pot files identical to what GNU xgettext[2]
+generates for C and C++ code. From there, the standard GNU tools can be
+used.
+
+A word about marking Python strings as candidates for translation. GNU
+xgettext recognizes the following keywords: gettext, dgettext, dcgettext,
+and gettext_noop. But those can be a lot of text to include all over your
+code. C and C++ have a trick: they use the C preprocessor. Most
+internationalized C source includes a #define for gettext() to _() so that
+what has to be written in the source is much less. Thus these are both
+translatable strings:
gettext("Translatable String")
_("Translatable String")
@@ -37,11 +55,11 @@ below for how to augment this.
[1] http://www.python.org/workshops/1997-10/proceedings/loewis.html
[2] http://www.gnu.org/software/gettext/gettext.html
-NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
-where ever possible. However some options are still missing or are not fully
-implemented. Also, xgettext's use of command line switches with option
-arguments is broken, and in these cases, pygettext just defines additional
-switches.
+NOTE: pygettext attempts to be option and feature compatible with GNU
+xgettext where ever possible. However some options are still missing or are
+not fully implemented. Also, xgettext's use of command line switches with
+option arguments is broken, and in these cases, pygettext just defines
+additional switches.
Usage: pygettext [options] inputfile ...
@@ -61,9 +79,9 @@ Options:
-D
--docstrings
- Extract module, class, method, and function docstrings. These do not
- need to be wrapped in _() markers, and in fact cannot be for Python to
- consider them docstrings. (See also the -X option).
+ Extract module, class, method, and function docstrings. These do
+ not need to be wrapped in _() markers, and in fact cannot be for
+ Python to consider them docstrings. (See also the -X option).
-h
--help
@@ -135,23 +153,17 @@ Options:
conjunction with the -D option above.
If `inputfile' is -, standard input is read.
-"""
+""")
import os
import sys
import time
import getopt
+import token
import tokenize
import operator
-# for selftesting
-try:
- import fintl
- _ = fintl.gettext
-except ImportError:
- def _(s): return s
-
-__version__ = '1.4'
+__version__ = '1.5'
default_keywords = ['_']
DEFAULTKEYWORDS = ', '.join(default_keywords)
@@ -183,7 +195,7 @@ msgstr ""
def usage(code, msg=''):
- print >> sys.stderr, _(__doc__) % globals()
+ print >> sys.stderr, __doc__ % globals()
if msg:
print >> sys.stderr, msg
sys.exit(code)
@@ -242,6 +254,103 @@ def normalize(s):
s = '""\n"' + lineterm.join(lines) + '"'
return s
+
+def containsAny(str, set):
+ """ Check whether 'str' contains ANY of the chars in 'set'
+ """
+ return 1 in [c in str for c in set]
+
+
+def _visit_pyfiles(list, dirname, names):
+ """ Helper for getFilesForName().
+ """
+ # get extension for python source files
+ if not globals().has_key('_py_ext'):
+ import imp
+ global _py_ext
+ _py_ext = [triple[0] for triple in imp.get_suffixes() if triple[2] == imp.PY_SOURCE][0]
+
+ # don't recurse into CVS directories
+ if 'CVS' in names:
+ names.remove('CVS')
+
+ # add all *.py files to list
+ list.extend(
+ [os.path.join(dirname, file)
+ for file in names
+ if os.path.splitext(file)[1] == _py_ext])
+
+
+def _get_modpkg_path(dotted_name, pathlist=None):
+ """ Get the filesystem path for a module or a package.
+
+ Return the file system path to a file for a module,
+ and to a directory for a package. Return None if
+ the name is not found, or is a builtin or extension module.
+ """
+ import imp
+
+ # split off top-most name
+ parts = dotted_name.split('.', 1)
+
+ if len(parts) > 1:
+ # we have a dotted path, import top-level package
+ try:
+ file, pathname, description = imp.find_module(parts[0], pathlist)
+ if file: file.close()
+ except ImportError:
+ return None
+
+ # check if it's indeed a package
+ if description[2] == imp.PKG_DIRECTORY:
+ # recursively handle the remaining name parts
+ pathname = _get_modpkg_path(parts[1], [pathname])
+ else:
+ pathname = None
+ else:
+ # plain name
+ try:
+ file, pathname, description = imp.find_module(dotted_name, pathlist)
+ if file: file.close()
+ if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
+ pathname = None
+ except ImportError:
+ pathname = None
+
+ return pathname
+
+
+def getFilesForName(name):
+ """ Get a list of module files for a filename, a module or package name,
+ or a directory.
+ """
+ import imp
+
+ if not os.path.exists(name):
+ # check for glob chars
+ if containsAny(name, "*?[]"):
+ import glob
+ files = glob.glob(name)
+ list = []
+ for file in files:
+ list.extend(getFilesForName(file))
+ return list
+
+ # try to find module or package
+ name = _get_modpkg_path(name)
+ if not name:
+ return []
+
+ if os.path.isdir(name):
+ # find all python files in directory
+ list = []
+ os.path.walk(name, _visit_pyfiles, list)
+ return list
+ elif os.path.exists(name):
+ # a single file
+ return [name]
+
+ return []
class TokenEater:
@@ -314,7 +423,12 @@ class TokenEater:
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
- # TBD: should we warn if we seen anything else?
+ elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
+ token.NEWLINE, tokenize.NL]:
+ # warn if we see anything else than STRING or whitespace
+ print >>sys.stderr, _('*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"') % {
+ 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno}
+ self.__state = self.__waiting
def __addentry(self, msg, lineno=None, isdocstring=0):
if lineno is None:
@@ -495,6 +609,15 @@ def main():
else:
options.toexclude = []
+ # resolve args to module lists
+ expanded = []
+ for arg in args:
+ if arg == '-':
+ expanded.append(arg)
+ else:
+ expanded.extend(getFilesForName(arg))
+ args = expanded
+
# slurp through all the files
eater = TokenEater(options)
for filename in args:
@@ -539,3 +662,6 @@ if __name__ == '__main__':
main()
# some more test strings
_(u'a unicode string')
+ _('*** Seen unexpected token "%(token)s"' % {'token': 'test'}) # this one creates a warning
+ _('more' 'than' 'one' 'string')
+