summaryrefslogtreecommitdiffstats
path: root/Tools/i18n/pygettext.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/i18n/pygettext.py')
-rwxr-xr-xTools/i18n/pygettext.py212
1 files changed, 122 insertions, 90 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index b1d281d..ddd750e 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python3
+#! /usr/bin/env python
# -*- coding: iso-8859-1 -*-
# Originally written by Barry Warsaw <barry@python.org>
#
@@ -156,14 +156,14 @@ If `inputfile' is -, standard input is read.
""")
import os
-import importlib.machinery
-import importlib.util
+import imp
import sys
import glob
import time
import getopt
import token
import tokenize
+import operator
__version__ = '1.5'
@@ -189,51 +189,49 @@ msgstr ""
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n"
-"Content-Type: text/plain; charset=%(charset)s\\n"
-"Content-Transfer-Encoding: %(encoding)s\\n"
+"Content-Type: text/plain; charset=CHARSET\\n"
+"Content-Transfer-Encoding: ENCODING\\n"
"Generated-By: pygettext.py %(version)s\\n"
''')
def usage(code, msg=''):
- print(__doc__ % globals(), file=sys.stderr)
+ print >> sys.stderr, __doc__ % globals()
if msg:
- print(msg, file=sys.stderr)
+ print >> sys.stderr, msg
sys.exit(code)
-def make_escapes(pass_nonascii):
- global escapes, escape
- if pass_nonascii:
- # Allow non-ascii characters to pass through so that e.g. 'msgid
+escapes = []
+
+def make_escapes(pass_iso8859):
+ global escapes
+ escapes = [chr(i) for i in range(256)]
+ if pass_iso8859:
+ # Allow iso-8859 characters to pass through so that e.g. 'msgid
# "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we
# escape any character outside the 32..126 range.
mod = 128
- escape = escape_ascii
else:
mod = 256
- escape = escape_nonascii
- escapes = [r"\%03o" % i for i in range(mod)]
- for i in range(32, 127):
- escapes[i] = chr(i)
- escapes[ord('\\')] = r'\\'
- escapes[ord('\t')] = r'\t'
- escapes[ord('\r')] = r'\r'
- escapes[ord('\n')] = r'\n'
- escapes[ord('\"')] = r'\"'
-
-
-def escape_ascii(s, encoding):
- return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
+ for i in range(mod):
+ if not(32 <= i <= 126):
+ escapes[i] = "\\%03o" % i
+ escapes[ord('\\')] = '\\\\'
+ escapes[ord('\t')] = '\\t'
+ escapes[ord('\r')] = '\\r'
+ escapes[ord('\n')] = '\\n'
+ escapes[ord('\"')] = '\\"'
-def escape_nonascii(s, encoding):
- return ''.join(escapes[b] for b in s.encode(encoding))
-
-def is_literal_string(s):
- return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
+def escape(s):
+ global escapes
+ s = list(s)
+ for i in range(len(s)):
+ s[i] = escapes[ord(s[i])]
+ return EMPTYSTRING.join(s)
def safe_eval(s):
@@ -241,18 +239,18 @@ def safe_eval(s):
return eval(s, {'__builtins__':{}}, {})
-def normalize(s, encoding):
+def normalize(s):
# This converts the various Python string types into a format that is
# appropriate for .po files, namely much closer to C style.
lines = s.split('\n')
if len(lines) == 1:
- s = '"' + escape(s, encoding) + '"'
+ s = '"' + escape(s) + '"'
else:
if not lines[-1]:
del lines[-1]
lines[-1] = lines[-1] + '\n'
for i in range(len(lines)):
- lines[i] = escape(lines[i], encoding)
+ lines[i] = escape(lines[i])
lineterm = '\\n"\n"'
s = '""\n"' + lineterm.join(lines) + '"'
return s
@@ -263,6 +261,45 @@ def containsAny(str, set):
return 1 in [c in str for c in set]
+def _get_modpkg_path(dotted_name, pathlist=None):
+ """Get the filesystem path for a module or a package.
+
+ Return the file system path to a file for a module, and to a directory for
+ a package. Return None if the name is not found, or is a builtin or
+ extension module.
+ """
+ # split off top-most name
+ parts = dotted_name.split('.', 1)
+
+ if len(parts) > 1:
+ # we have a dotted path, import top-level package
+ try:
+ file, pathname, description = imp.find_module(parts[0], pathlist)
+ if file: file.close()
+ except ImportError:
+ return None
+
+ # check if it's indeed a package
+ if description[2] == imp.PKG_DIRECTORY:
+ # recursively handle the remaining name parts
+ pathname = _get_modpkg_path(parts[1], [pathname])
+ else:
+ pathname = None
+ else:
+ # plain name
+ try:
+ file, pathname, description = imp.find_module(
+ dotted_name, pathlist)
+ if file:
+ file.close()
+ if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
+ pathname = None
+ except ImportError:
+ pathname = None
+
+ return pathname
+
+
def getFilesForName(name):
"""Get a list of module files for a filename, a module or package name,
or a directory.
@@ -277,11 +314,7 @@ def getFilesForName(name):
return list
# try to find module or package
- try:
- spec = importlib.util.find_spec(name)
- name = spec.origin
- except ImportError:
- name = None
+ name = _get_modpkg_path(name)
if not name:
return []
@@ -289,7 +322,10 @@ def getFilesForName(name):
# find all python files in directory
list = []
# get extension for python source files
- _py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
+ if '_py_ext' not in globals():
+ global _py_ext
+ _py_ext = [triple[0] for triple in imp.get_suffixes()
+ if triple[2] == imp.PY_SOURCE][0]
for root, dirs, files in os.walk(name):
# don't recurse into CVS directories
if 'CVS' in dirs:
@@ -316,13 +352,12 @@ class TokenEater:
self.__lineno = -1
self.__freshmodule = 1
self.__curfile = None
- self.__enclosurecount = 0
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
## import token
-## print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
-## file=sys.stderr)
+## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
+## 'tstring:', tstring
self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno):
@@ -331,13 +366,13 @@ class TokenEater:
if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
# module docstring?
if self.__freshmodule:
- if ttype == tokenize.STRING and is_literal_string(tstring):
+ if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL):
self.__freshmodule = 0
return
- # class or func/method docstring?
+ # class docstring?
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
@@ -345,19 +380,13 @@ class TokenEater:
self.__state = self.__keywordseen
def __suiteseen(self, ttype, tstring, lineno):
- # skip over any enclosure pairs until we see the colon
- if ttype == tokenize.OP:
- if tstring == ':' and self.__enclosurecount == 0:
- # we see a colon and we're not in an enclosure: end of def
- self.__state = self.__suitedocstring
- elif tstring in '([{':
- self.__enclosurecount += 1
- elif tstring in ')]}':
- self.__enclosurecount -= 1
+ # ignore anything until we see the colon
+ if ttype == tokenize.OP and tstring == ':':
+ self.__state = self.__suitedocstring
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
- if ttype == tokenize.STRING and is_literal_string(tstring):
+ if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -382,18 +411,18 @@ class TokenEater:
if self.__data:
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
- elif ttype == tokenize.STRING and is_literal_string(tstring):
+ elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
token.NEWLINE, tokenize.NL]:
# warn if we see anything else than STRING or whitespace
- print(_(
+ print >> sys.stderr, _(
'*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
) % {
'token': tstring,
'file': self.__curfile,
'lineno': self.__lineno
- }, file=sys.stderr)
+ }
self.__state = self.__waiting
def __addentry(self, msg, lineno=None, isdocstring=0):
@@ -409,38 +438,42 @@ class TokenEater:
def write(self, fp):
options = self.__options
- timestamp = time.strftime('%Y-%m-%d %H:%M%z')
- encoding = fp.encoding if fp.encoding else 'UTF-8'
- print(pot_header % {'time': timestamp, 'version': __version__,
- 'charset': encoding,
- 'encoding': '8bit'}, file=fp)
+ timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
+ # The time stamp in the header doesn't have the same format as that
+ # generated by xgettext...
+ print >> fp, pot_header % {'time': timestamp, 'version': __version__}
# Sort the entries. First sort each particular entry's keys, then
# sort all the entries by their first item.
reverse = {}
for k, v in self.__messages.items():
- keys = sorted(v.keys())
+ keys = v.keys()
+ keys.sort()
reverse.setdefault(tuple(keys), []).append((k, v))
- rkeys = sorted(reverse.keys())
+ rkeys = reverse.keys()
+ rkeys.sort()
for rkey in rkeys:
rentries = reverse[rkey]
rentries.sort()
for k, v in rentries:
+ isdocstring = 0
# If the entry was gleaned out of a docstring, then add a
# comment stating so. This is to aid translators who may wish
# to skip translating some unimportant docstrings.
- isdocstring = any(v.values())
+ if reduce(operator.__add__, v.values()):
+ isdocstring = 1
# k is the message string, v is a dictionary-set of (filename,
# lineno) tuples. We want to sort the entries in v first by
# file name and then by line number.
- v = sorted(v.keys())
+ v = v.keys()
+ v.sort()
if not options.writelocations:
pass
# location comments are different b/w Solaris and GNU:
elif options.locationstyle == options.SOLARIS:
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
- print(_(
- '# File: %(filename)s, line: %(lineno)d') % d, file=fp)
+ print >>fp, _(
+ '# File: %(filename)s, line: %(lineno)d') % d
elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceed 'options.width'
@@ -451,14 +484,14 @@ class TokenEater:
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
- print(locline, file=fp)
+ print >> fp, locline
locline = "#:" + s
if len(locline) > 2:
- print(locline, file=fp)
+ print >> fp, locline
if isdocstring:
- print('#, docstring', file=fp)
- print('msgid', normalize(k, encoding), file=fp)
- print('msgstr ""\n', file=fp)
+ print >> fp, '#, docstring'
+ print >> fp, 'msgid', normalize(k)
+ print >> fp, 'msgstr ""\n'
@@ -474,7 +507,7 @@ def main():
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings', 'no-docstrings',
])
- except getopt.error as msg:
+ except getopt.error, msg:
usage(1, msg)
# for holding option values
@@ -532,7 +565,7 @@ def main():
elif opt in ('-v', '--verbose'):
options.verbose = 1
elif opt in ('-V', '--version'):
- print(_('pygettext.py (xgettext for Python) %s') % __version__)
+ print _('pygettext.py (xgettext for Python) %s') % __version__
sys.exit(0)
elif opt in ('-w', '--width'):
try:
@@ -561,11 +594,12 @@ def main():
# initialize list of strings to exclude
if options.excludefilename:
try:
- with open(options.excludefilename) as fp:
- options.toexclude = fp.readlines()
+ fp = open(options.excludefilename)
+ options.toexclude = fp.readlines()
+ fp.close()
except IOError:
- print(_(
- "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
+ print >> sys.stderr, _(
+ "Can't read --exclude-file: %s") % options.excludefilename
sys.exit(1)
else:
options.toexclude = []
@@ -584,24 +618,21 @@ def main():
for filename in args:
if filename == '-':
if options.verbose:
- print(_('Reading standard input'))
- fp = sys.stdin.buffer
+ print _('Reading standard input')
+ fp = sys.stdin
closep = 0
else:
if options.verbose:
- print(_('Working on %s') % filename)
- fp = open(filename, 'rb')
+ print _('Working on %s') % filename
+ fp = open(filename)
closep = 1
try:
eater.set_filename(filename)
try:
- tokens = tokenize.tokenize(fp.readline)
- for _token in tokens:
- eater(*_token)
- except tokenize.TokenError as e:
- print('%s: %s, line %d, column %d' % (
- e.args[0], filename, e.args[1][0], e.args[1][1]),
- file=sys.stderr)
+ tokenize.tokenize(fp.readline, eater)
+ except tokenize.TokenError, e:
+ print >> sys.stderr, '%s: %s, line %d, column %d' % (
+ e[0], filename, e[1][0], e[1][1])
finally:
if closep:
fp.close()
@@ -625,6 +656,7 @@ def main():
if __name__ == '__main__':
main()
# some more test strings
+ _(u'a unicode string')
# this one creates a warning
_('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
_('more' 'than' 'one' 'string')