diff options
Diffstat (limited to 'Tools/i18n/pygettext.py')
-rwxr-xr-x | Tools/i18n/pygettext.py | 212 |
1 files changed, 122 insertions, 90 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index b1d281d..ddd750e 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python3 +#! /usr/bin/env python # -*- coding: iso-8859-1 -*- # Originally written by Barry Warsaw <barry@python.org> # @@ -156,14 +156,14 @@ If `inputfile' is -, standard input is read. """) import os -import importlib.machinery -import importlib.util +import imp import sys import glob import time import getopt import token import tokenize +import operator __version__ = '1.5' @@ -189,51 +189,49 @@ msgstr "" "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" "Language-Team: LANGUAGE <LL@li.org>\\n" "MIME-Version: 1.0\\n" -"Content-Type: text/plain; charset=%(charset)s\\n" -"Content-Transfer-Encoding: %(encoding)s\\n" +"Content-Type: text/plain; charset=CHARSET\\n" +"Content-Transfer-Encoding: ENCODING\\n" "Generated-By: pygettext.py %(version)s\\n" ''') def usage(code, msg=''): - print(__doc__ % globals(), file=sys.stderr) + print >> sys.stderr, __doc__ % globals() if msg: - print(msg, file=sys.stderr) + print >> sys.stderr, msg sys.exit(code) -def make_escapes(pass_nonascii): - global escapes, escape - if pass_nonascii: - # Allow non-ascii characters to pass through so that e.g. 'msgid +escapes = [] + +def make_escapes(pass_iso8859): + global escapes + escapes = [chr(i) for i in range(256)] + if pass_iso8859: + # Allow iso-8859 characters to pass through so that e.g. 'msgid # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we # escape any character outside the 32..126 range. mod = 128 - escape = escape_ascii else: mod = 256 - escape = escape_nonascii - escapes = [r"\%03o" % i for i in range(mod)] - for i in range(32, 127): - escapes[i] = chr(i) - escapes[ord('\\')] = r'\\' - escapes[ord('\t')] = r'\t' - escapes[ord('\r')] = r'\r' - escapes[ord('\n')] = r'\n' - escapes[ord('\"')] = r'\"' - - -def escape_ascii(s, encoding): - return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s) + for i in range(mod): + if not(32 <= i <= 126): + escapes[i] = "\\%03o" % i + escapes[ord('\\')] = '\\\\' + escapes[ord('\t')] = '\\t' + escapes[ord('\r')] = '\\r' + escapes[ord('\n')] = '\\n' + escapes[ord('\"')] = '\\"' -def escape_nonascii(s, encoding): - return ''.join(escapes[b] for b in s.encode(encoding)) - -def is_literal_string(s): - return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"') +def escape(s): + global escapes + s = list(s) + for i in range(len(s)): + s[i] = escapes[ord(s[i])] + return EMPTYSTRING.join(s) def safe_eval(s): @@ -241,18 +239,18 @@ def safe_eval(s): return eval(s, {'__builtins__':{}}, {}) -def normalize(s, encoding): +def normalize(s): # This converts the various Python string types into a format that is # appropriate for .po files, namely much closer to C style. lines = s.split('\n') if len(lines) == 1: - s = '"' + escape(s, encoding) + '"' + s = '"' + escape(s) + '"' else: if not lines[-1]: del lines[-1] lines[-1] = lines[-1] + '\n' for i in range(len(lines)): - lines[i] = escape(lines[i], encoding) + lines[i] = escape(lines[i]) lineterm = '\\n"\n"' s = '""\n"' + lineterm.join(lines) + '"' return s @@ -263,6 +261,45 @@ def containsAny(str, set): return 1 in [c in str for c in set] +def _get_modpkg_path(dotted_name, pathlist=None): + """Get the filesystem path for a module or a package. + + Return the file system path to a file for a module, and to a directory for + a package. Return None if the name is not found, or is a builtin or + extension module. + """ + # split off top-most name + parts = dotted_name.split('.', 1) + + if len(parts) > 1: + # we have a dotted path, import top-level package + try: + file, pathname, description = imp.find_module(parts[0], pathlist) + if file: file.close() + except ImportError: + return None + + # check if it's indeed a package + if description[2] == imp.PKG_DIRECTORY: + # recursively handle the remaining name parts + pathname = _get_modpkg_path(parts[1], [pathname]) + else: + pathname = None + else: + # plain name + try: + file, pathname, description = imp.find_module( + dotted_name, pathlist) + if file: + file.close() + if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: + pathname = None + except ImportError: + pathname = None + + return pathname + + def getFilesForName(name): """Get a list of module files for a filename, a module or package name, or a directory. @@ -277,11 +314,7 @@ def getFilesForName(name): return list # try to find module or package - try: - spec = importlib.util.find_spec(name) - name = spec.origin - except ImportError: - name = None + name = _get_modpkg_path(name) if not name: return [] @@ -289,7 +322,10 @@ def getFilesForName(name): # find all python files in directory list = [] # get extension for python source files - _py_ext = importlib.machinery.SOURCE_SUFFIXES[0] + if '_py_ext' not in globals(): + global _py_ext + _py_ext = [triple[0] for triple in imp.get_suffixes() + if triple[2] == imp.PY_SOURCE][0] for root, dirs, files in os.walk(name): # don't recurse into CVS directories if 'CVS' in dirs: @@ -316,13 +352,12 @@ class TokenEater: self.__lineno = -1 self.__freshmodule = 1 self.__curfile = None - self.__enclosurecount = 0 def __call__(self, ttype, tstring, stup, etup, line): # dispatch ## import token -## print('ttype:', token.tok_name[ttype], 'tstring:', tstring, -## file=sys.stderr) +## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ +## 'tstring:', tstring self.__state(ttype, tstring, stup[0]) def __waiting(self, ttype, tstring, lineno): @@ -331,13 +366,13 @@ class TokenEater: if opts.docstrings and not opts.nodocstrings.get(self.__curfile): # module docstring? if self.__freshmodule: - if ttype == tokenize.STRING and is_literal_string(tstring): + if ttype == tokenize.STRING: self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__freshmodule = 0 elif ttype not in (tokenize.COMMENT, tokenize.NL): self.__freshmodule = 0 return - # class or func/method docstring? + # class docstring? if ttype == tokenize.NAME and tstring in ('class', 'def'): self.__state = self.__suiteseen return @@ -345,19 +380,13 @@ class TokenEater: self.__state = self.__keywordseen def __suiteseen(self, ttype, tstring, lineno): - # skip over any enclosure pairs until we see the colon - if ttype == tokenize.OP: - if tstring == ':' and self.__enclosurecount == 0: - # we see a colon and we're not in an enclosure: end of def - self.__state = self.__suitedocstring - elif tstring in '([{': - self.__enclosurecount += 1 - elif tstring in ')]}': - self.__enclosurecount -= 1 + # ignore anything until we see the colon + if ttype == tokenize.OP and tstring == ':': + self.__state = self.__suitedocstring def __suitedocstring(self, ttype, tstring, lineno): # ignore any intervening noise - if ttype == tokenize.STRING and is_literal_string(tstring): + if ttype == tokenize.STRING: self.__addentry(safe_eval(tstring), lineno, isdocstring=1) self.__state = self.__waiting elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, @@ -382,18 +411,18 @@ class TokenEater: if self.__data: self.__addentry(EMPTYSTRING.join(self.__data)) self.__state = self.__waiting - elif ttype == tokenize.STRING and is_literal_string(tstring): + elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL]: # warn if we see anything else than STRING or whitespace - print(_( + print >> sys.stderr, _( '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' ) % { 'token': tstring, 'file': self.__curfile, 'lineno': self.__lineno - }, file=sys.stderr) + } self.__state = self.__waiting def __addentry(self, msg, lineno=None, isdocstring=0): @@ -409,38 +438,42 @@ class TokenEater: def write(self, fp): options = self.__options - timestamp = time.strftime('%Y-%m-%d %H:%M%z') - encoding = fp.encoding if fp.encoding else 'UTF-8' - print(pot_header % {'time': timestamp, 'version': __version__, - 'charset': encoding, - 'encoding': '8bit'}, file=fp) + timestamp = time.strftime('%Y-%m-%d %H:%M+%Z') + # The time stamp in the header doesn't have the same format as that + # generated by xgettext... + print >> fp, pot_header % {'time': timestamp, 'version': __version__} # Sort the entries. First sort each particular entry's keys, then # sort all the entries by their first item. reverse = {} for k, v in self.__messages.items(): - keys = sorted(v.keys()) + keys = v.keys() + keys.sort() reverse.setdefault(tuple(keys), []).append((k, v)) - rkeys = sorted(reverse.keys()) + rkeys = reverse.keys() + rkeys.sort() for rkey in rkeys: rentries = reverse[rkey] rentries.sort() for k, v in rentries: + isdocstring = 0 # If the entry was gleaned out of a docstring, then add a # comment stating so. This is to aid translators who may wish # to skip translating some unimportant docstrings. - isdocstring = any(v.values()) + if reduce(operator.__add__, v.values()): + isdocstring = 1 # k is the message string, v is a dictionary-set of (filename, # lineno) tuples. We want to sort the entries in v first by # file name and then by line number. - v = sorted(v.keys()) + v = v.keys() + v.sort() if not options.writelocations: pass # location comments are different b/w Solaris and GNU: elif options.locationstyle == options.SOLARIS: for filename, lineno in v: d = {'filename': filename, 'lineno': lineno} - print(_( - '# File: %(filename)s, line: %(lineno)d') % d, file=fp) + print >>fp, _( + '# File: %(filename)s, line: %(lineno)d') % d elif options.locationstyle == options.GNU: # fit as many locations on one line, as long as the # resulting line length doesn't exceed 'options.width' @@ -451,14 +484,14 @@ class TokenEater: if len(locline) + len(s) <= options.width: locline = locline + s else: - print(locline, file=fp) + print >> fp, locline locline = "#:" + s if len(locline) > 2: - print(locline, file=fp) + print >> fp, locline if isdocstring: - print('#, docstring', file=fp) - print('msgid', normalize(k, encoding), file=fp) - print('msgstr ""\n', file=fp) + print >> fp, '#, docstring' + print >> fp, 'msgid', normalize(k) + print >> fp, 'msgstr ""\n' @@ -474,7 +507,7 @@ def main(): 'style=', 'verbose', 'version', 'width=', 'exclude-file=', 'docstrings', 'no-docstrings', ]) - except getopt.error as msg: + except getopt.error, msg: usage(1, msg) # for holding option values @@ -532,7 +565,7 @@ def main(): elif opt in ('-v', '--verbose'): options.verbose = 1 elif opt in ('-V', '--version'): - print(_('pygettext.py (xgettext for Python) %s') % __version__) + print _('pygettext.py (xgettext for Python) %s') % __version__ sys.exit(0) elif opt in ('-w', '--width'): try: @@ -561,11 +594,12 @@ def main(): # initialize list of strings to exclude if options.excludefilename: try: - with open(options.excludefilename) as fp: - options.toexclude = fp.readlines() + fp = open(options.excludefilename) + options.toexclude = fp.readlines() + fp.close() except IOError: - print(_( - "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr) + print >> sys.stderr, _( + "Can't read --exclude-file: %s") % options.excludefilename sys.exit(1) else: options.toexclude = [] @@ -584,24 +618,21 @@ def main(): for filename in args: if filename == '-': if options.verbose: - print(_('Reading standard input')) - fp = sys.stdin.buffer + print _('Reading standard input') + fp = sys.stdin closep = 0 else: if options.verbose: - print(_('Working on %s') % filename) - fp = open(filename, 'rb') + print _('Working on %s') % filename + fp = open(filename) closep = 1 try: eater.set_filename(filename) try: - tokens = tokenize.tokenize(fp.readline) - for _token in tokens: - eater(*_token) - except tokenize.TokenError as e: - print('%s: %s, line %d, column %d' % ( - e.args[0], filename, e.args[1][0], e.args[1][1]), - file=sys.stderr) + tokenize.tokenize(fp.readline, eater) + except tokenize.TokenError, e: + print >> sys.stderr, '%s: %s, line %d, column %d' % ( + e[0], filename, e[1][0], e[1][1]) finally: if closep: fp.close() @@ -625,6 +656,7 @@ def main(): if __name__ == '__main__': main() # some more test strings + _(u'a unicode string') # this one creates a warning _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} _('more' 'than' 'one' 'string') |