1 files changed, 122 insertions, 90 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index b1d281d..ddd750e 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python3
+#! /usr/bin/env python
 # -*- coding: iso-8859-1 -*-
 # Originally written by Barry Warsaw <barry@python.org>
 #
@@ -156,14 +156,14 @@ If `inputfile' is -, standard input is read.
 """)
 
 import os
-import importlib.machinery
-import importlib.util
+import imp
 import sys
 import glob
 import time
 import getopt
 import token
 import tokenize
+import operator
 
 __version__ = '1.5'
 
@@ -189,51 +189,49 @@ msgstr ""
 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
 "Language-Team: LANGUAGE <LL@li.org>\\n"
 "MIME-Version: 1.0\\n"
-"Content-Type: text/plain; charset=%(charset)s\\n"
-"Content-Transfer-Encoding: %(encoding)s\\n"
+"Content-Type: text/plain; charset=CHARSET\\n"
+"Content-Transfer-Encoding: ENCODING\\n"
 "Generated-By: pygettext.py %(version)s\\n"
 
 ''')
 
 
 def usage(code, msg=''):
-    print(__doc__ % globals(), file=sys.stderr)
+    print >> sys.stderr, __doc__ % globals()
     if msg:
-        print(msg, file=sys.stderr)
+        print >> sys.stderr, msg
     sys.exit(code)
 
 
 
-def make_escapes(pass_nonascii):
-    global escapes, escape
-    if pass_nonascii:
-        # Allow non-ascii characters to pass through so that e.g. 'msgid
+escapes = []
+
+def make_escapes(pass_iso8859):
+    global escapes
+    escapes = [chr(i) for i in range(256)]
+    if pass_iso8859:
+        # Allow iso-8859 characters to pass through so that e.g. 'msgid
         # "Höhe"' would result not result in 'msgid "H\366he"'.  Otherwise we
         # escape any character outside the 32..126 range.
         mod = 128
-        escape = escape_ascii
     else:
         mod = 256
-        escape = escape_nonascii
-    escapes = [r"\%03o" % i for i in range(mod)]
-    for i in range(32, 127):
-        escapes[i] = chr(i)
-    escapes[ord('\\')] = r'\\'
-    escapes[ord('\t')] = r'\t'
-    escapes[ord('\r')] = r'\r'
-    escapes[ord('\n')] = r'\n'
-    escapes[ord('\"')] = r'\"'
-
-
-def escape_ascii(s, encoding):
-    return ''.join(escapes[ord(c)] if ord(c) < 128 else c for c in s)
+    for i in range(mod):
+        if not(32 <= i <= 126):
+            escapes[i] = "\\%03o" % i
+    escapes[ord('\\')] = '\\\\'
+    escapes[ord('\t')] = '\\t'
+    escapes[ord('\r')] = '\\r'
+    escapes[ord('\n')] = '\\n'
+    escapes[ord('\"')] = '\\"'
 
-def escape_nonascii(s, encoding):
-    return ''.join(escapes[b] for b in s.encode(encoding))
 
-
-def is_literal_string(s):
-    return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
+def escape(s):
+    global escapes
+    s = list(s)
+    for i in range(len(s)):
+        s[i] = escapes[ord(s[i])]
+    return EMPTYSTRING.join(s)
 
 
 def safe_eval(s):
@@ -241,18 +239,18 @@ def safe_eval(s):
     return eval(s, {'__builtins__':{}}, {})
 
 
-def normalize(s, encoding):
+def normalize(s):
     # This converts the various Python string types into a format that is
     # appropriate for .po files, namely much closer to C style.
     lines = s.split('\n')
     if len(lines) == 1:
-        s = '"' + escape(s, encoding) + '"'
+        s = '"' + escape(s) + '"'
     else:
         if not lines[-1]:
             del lines[-1]
             lines[-1] = lines[-1] + '\n'
         for i in range(len(lines)):
-            lines[i] = escape(lines[i], encoding)
+            lines[i] = escape(lines[i])
         lineterm = '\\n"\n"'
         s = '""\n"' + lineterm.join(lines) + '"'
     return s
@@ -263,6 +261,45 @@ def containsAny(str, set):
     return 1 in [c in str for c in set]
 
 
+def _get_modpkg_path(dotted_name, pathlist=None):
+    """Get the filesystem path for a module or a package.
+
+    Return the file system path to a file for a module, and to a directory for
+    a package. Return None if the name is not found, or is a builtin or
+    extension module.
+    """
+    # split off top-most name
+    parts = dotted_name.split('.', 1)
+
+    if len(parts) > 1:
+        # we have a dotted path, import top-level package
+        try:
+            file, pathname, description = imp.find_module(parts[0], pathlist)
+            if file: file.close()
+        except ImportError:
+            return None
+
+        # check if it's indeed a package
+        if description[2] == imp.PKG_DIRECTORY:
+            # recursively handle the remaining name parts
+            pathname = _get_modpkg_path(parts[1], [pathname])
+        else:
+            pathname = None
+    else:
+        # plain name
+        try:
+            file, pathname, description = imp.find_module(
+                dotted_name, pathlist)
+            if file:
+                file.close()
+            if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]:
+                pathname = None
+        except ImportError:
+            pathname = None
+
+    return pathname
+
+
 def getFilesForName(name):
     """Get a list of module files for a filename, a module or package name,
     or a directory.
@@ -277,11 +314,7 @@ def getFilesForName(name):
             return list
 
         # try to find module or package
-        try:
-            spec = importlib.util.find_spec(name)
-            name = spec.origin
-        except ImportError:
-            name = None
+        name = _get_modpkg_path(name)
         if not name:
             return []
 
@@ -289,7 +322,10 @@ def getFilesForName(name):
         # find all python files in directory
         list = []
         # get extension for python source files
-        _py_ext = importlib.machinery.SOURCE_SUFFIXES[0]
+        if '_py_ext' not in globals():
+            global _py_ext
+            _py_ext = [triple[0] for triple in imp.get_suffixes()
+                       if triple[2] == imp.PY_SOURCE][0]
         for root, dirs, files in os.walk(name):
             # don't recurse into CVS directories
             if 'CVS' in dirs:
@@ -316,13 +352,12 @@ class TokenEater:
         self.__lineno = -1
         self.__freshmodule = 1
         self.__curfile = None
-        self.__enclosurecount = 0
 
     def __call__(self, ttype, tstring, stup, etup, line):
         # dispatch
 ##        import token
-##        print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
-##              file=sys.stderr)
+##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
+##              'tstring:', tstring
         self.__state(ttype, tstring, stup[0])
 
     def __waiting(self, ttype, tstring, lineno):
@@ -331,13 +366,13 @@ class TokenEater:
         if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
             # module docstring?
             if self.__freshmodule:
-                if ttype == tokenize.STRING and is_literal_string(tstring):
+                if ttype == tokenize.STRING:
                     self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
                     self.__freshmodule = 0
                 elif ttype not in (tokenize.COMMENT, tokenize.NL):
                     self.__freshmodule = 0
                 return
-            # class or func/method docstring?
+            # class docstring?
             if ttype == tokenize.NAME and tstring in ('class', 'def'):
                 self.__state = self.__suiteseen
                 return
@@ -345,19 +380,13 @@ class TokenEater:
             self.__state = self.__keywordseen
 
     def __suiteseen(self, ttype, tstring, lineno):
-        # skip over any enclosure pairs until we see the colon
-        if ttype == tokenize.OP:
-            if tstring == ':' and self.__enclosurecount == 0:
-                # we see a colon and we're not in an enclosure: end of def
-                self.__state = self.__suitedocstring
-            elif tstring in '([{':
-                self.__enclosurecount += 1
-            elif tstring in ')]}':
-                self.__enclosurecount -= 1
+        # ignore anything until we see the colon
+        if ttype == tokenize.OP and tstring == ':':
+            self.__state = self.__suitedocstring
 
     def __suitedocstring(self, ttype, tstring, lineno):
         # ignore any intervening noise
-        if ttype == tokenize.STRING and is_literal_string(tstring):
+        if ttype == tokenize.STRING:
             self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
             self.__state = self.__waiting
         elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -382,18 +411,18 @@ class TokenEater:
             if self.__data:
                 self.__addentry(EMPTYSTRING.join(self.__data))
             self.__state = self.__waiting
-        elif ttype == tokenize.STRING and is_literal_string(tstring):
+        elif ttype == tokenize.STRING:
             self.__data.append(safe_eval(tstring))
         elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
                            token.NEWLINE, tokenize.NL]:
             # warn if we see anything else than STRING or whitespace
-            print(_(
+            print >> sys.stderr, _(
                 '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"'
                 ) % {
                 'token': tstring,
                 'file': self.__curfile,
                 'lineno': self.__lineno
-                }, file=sys.stderr)
+                }
             self.__state = self.__waiting
 
     def __addentry(self, msg, lineno=None, isdocstring=0):
@@ -409,38 +438,42 @@ class TokenEater:
 
     def write(self, fp):
         options = self.__options
-        timestamp = time.strftime('%Y-%m-%d %H:%M%z')
-        encoding = fp.encoding if fp.encoding else 'UTF-8'
-        print(pot_header % {'time': timestamp, 'version': __version__,
-                            'charset': encoding,
-                            'encoding': '8bit'}, file=fp)
+        timestamp = time.strftime('%Y-%m-%d %H:%M+%Z')
+        # The time stamp in the header doesn't have the same format as that
+        # generated by xgettext...
+        print >> fp, pot_header % {'time': timestamp, 'version': __version__}
         # Sort the entries.  First sort each particular entry's keys, then
         # sort all the entries by their first item.
         reverse = {}
         for k, v in self.__messages.items():
-            keys = sorted(v.keys())
+            keys = v.keys()
+            keys.sort()
             reverse.setdefault(tuple(keys), []).append((k, v))
-        rkeys = sorted(reverse.keys())
+        rkeys = reverse.keys()
+        rkeys.sort()
         for rkey in rkeys:
             rentries = reverse[rkey]
             rentries.sort()
             for k, v in rentries:
+                isdocstring = 0
                 # If the entry was gleaned out of a docstring, then add a
                 # comment stating so.  This is to aid translators who may wish
                 # to skip translating some unimportant docstrings.
-                isdocstring = any(v.values())
+                if reduce(operator.__add__, v.values()):
+                    isdocstring = 1
                 # k is the message string, v is a dictionary-set of (filename,
                 # lineno) tuples.  We want to sort the entries in v first by
                 # file name and then by line number.
-                v = sorted(v.keys())
+                v = v.keys()
+                v.sort()
                 if not options.writelocations:
                     pass
                 # location comments are different b/w Solaris and GNU:
                 elif options.locationstyle == options.SOLARIS:
                     for filename, lineno in v:
                         d = {'filename': filename, 'lineno': lineno}
-                        print(_(
-                            '# File: %(filename)s, line: %(lineno)d') % d, file=fp)
+                        print >>fp, _(
+                            '# File: %(filename)s, line: %(lineno)d') % d
                 elif options.locationstyle == options.GNU:
                     # fit as many locations on one line, as long as the
                     # resulting line length doesn't exceed 'options.width'
@@ -451,14 +484,14 @@ class TokenEater:
                         if len(locline) + len(s) <= options.width:
                             locline = locline + s
                         else:
-                            print(locline, file=fp)
+                            print >> fp, locline
                             locline = "#:" + s
                     if len(locline) > 2:
-                        print(locline, file=fp)
+                        print >> fp, locline
                 if isdocstring:
-                    print('#, docstring', file=fp)
-                print('msgid', normalize(k, encoding), file=fp)
-                print('msgstr ""\n', file=fp)
+                    print >> fp, '#, docstring'
+                print >> fp, 'msgid', normalize(k)
+                print >> fp, 'msgstr ""\n'
 
 
 
@@ -474,7 +507,7 @@ def main():
              'style=', 'verbose', 'version', 'width=', 'exclude-file=',
              'docstrings', 'no-docstrings',
              ])
-    except getopt.error as msg:
+    except getopt.error, msg:
         usage(1, msg)
 
     # for holding option values
@@ -532,7 +565,7 @@ def main():
         elif opt in ('-v', '--verbose'):
             options.verbose = 1
         elif opt in ('-V', '--version'):
-            print(_('pygettext.py (xgettext for Python) %s') % __version__)
+            print _('pygettext.py (xgettext for Python) %s') % __version__
             sys.exit(0)
         elif opt in ('-w', '--width'):
             try:
@@ -561,11 +594,12 @@ def main():
     # initialize list of strings to exclude
     if options.excludefilename:
         try:
-            with open(options.excludefilename) as fp:
-                options.toexclude = fp.readlines()
+            fp = open(options.excludefilename)
+            options.toexclude = fp.readlines()
+            fp.close()
         except IOError:
-            print(_(
-                "Can't read --exclude-file: %s") % options.excludefilename, file=sys.stderr)
+            print >> sys.stderr, _(
+                "Can't read --exclude-file: %s") % options.excludefilename
             sys.exit(1)
     else:
         options.toexclude = []
@@ -584,24 +618,21 @@ def main():
     for filename in args:
         if filename == '-':
             if options.verbose:
-                print(_('Reading standard input'))
-            fp = sys.stdin.buffer
+                print _('Reading standard input')
+            fp = sys.stdin
             closep = 0
         else:
             if options.verbose:
-                print(_('Working on %s') % filename)
-            fp = open(filename, 'rb')
+                print _('Working on %s') % filename
+            fp = open(filename)
             closep = 1
         try:
             eater.set_filename(filename)
             try:
-                tokens = tokenize.tokenize(fp.readline)
-                for _token in tokens:
-                    eater(*_token)
-            except tokenize.TokenError as e:
-                print('%s: %s, line %d, column %d' % (
-                    e.args[0], filename, e.args[1][0], e.args[1][1]),
-                    file=sys.stderr)
+                tokenize.tokenize(fp.readline, eater)
+            except tokenize.TokenError, e:
+                print >> sys.stderr, '%s: %s, line %d, column %d' % (
+                    e[0], filename, e[1][0], e[1][1])
         finally:
             if closep:
                 fp.close()
@@ -625,6 +656,7 @@ def main():
 if __name__ == '__main__':
     main()
     # some more test strings
+    _(u'a unicode string')
     # this one creates a warning
     _('*** Seen unexpected token "%(token)s"') % {'token': 'test'}
     _('more' 'than' 'one' 'string')