bpo-33189: pygettext.py now accepts only literal strings (GH-6364)

as docstrings and translatable strings, and rejects bytes literals and f-string expressions.
author: Serhiy Storchaka <storchaka@gmail.com> 2018-04-19 06:23:03 (GMT)
committer: GitHub <noreply@github.com> 2018-04-19 06:23:03 (GMT)
commit: 69524821a87251b7aee966f6e46b3810ff5aaa64 (patch)
tree: c846e23df670d6fa22ecba626d2280efd222cc86 /Tools
parent: b7e1eff8436f6e0c4aac440036092fcf96f82960 (diff)
download: cpython-69524821a87251b7aee966f6e46b3810ff5aaa64.zip
cpython-69524821a87251b7aee966f6e46b3810ff5aaa64.tar.gz
cpython-69524821a87251b7aee966f6e46b3810ff5aaa64.tar.bz2
1 files changed, 9 insertions, 5 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index 13d7a64..b46dd33 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -232,6 +232,10 @@ def escape_nonascii(s, encoding):
     return ''.join(escapes[b] for b in s.encode(encoding))
 
 
+def is_literal_string(s):
+    return s[0] in '\'"' or (s[0] in 'rRuU' and s[1] in '\'"')
+
+
 def safe_eval(s):
     # unwrap quotes, safely
     return eval(s, {'__builtins__':{}}, {})
@@ -317,8 +321,8 @@ class TokenEater:
     def __call__(self, ttype, tstring, stup, etup, line):
         # dispatch
 ##        import token
-##        print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
-##              'tstring:', tstring
+##        print('ttype:', token.tok_name[ttype], 'tstring:', tstring,
+##              file=sys.stderr)
         self.__state(ttype, tstring, stup[0])
 
     def __waiting(self, ttype, tstring, lineno):
@@ -327,7 +331,7 @@ class TokenEater:
         if opts.docstrings and not opts.nodocstrings.get(self.__curfile):
             # module docstring?
             if self.__freshmodule:
-                if ttype == tokenize.STRING:
+                if ttype == tokenize.STRING and is_literal_string(tstring):
                     self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
                     self.__freshmodule = 0
                 elif ttype not in (tokenize.COMMENT, tokenize.NL):
@@ -353,7 +357,7 @@ class TokenEater:
 
     def __suitedocstring(self, ttype, tstring, lineno):
         # ignore any intervening noise
-        if ttype == tokenize.STRING:
+        if ttype == tokenize.STRING and is_literal_string(tstring):
             self.__addentry(safe_eval(tstring), lineno, isdocstring=1)
             self.__state = self.__waiting
         elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
@@ -378,7 +382,7 @@ class TokenEater:
             if self.__data:
                 self.__addentry(EMPTYSTRING.join(self.__data))
             self.__state = self.__waiting
-        elif ttype == tokenize.STRING:
+        elif ttype == tokenize.STRING and is_literal_string(tstring):
             self.__data.append(safe_eval(tstring))
         elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT,
                            token.NEWLINE, tokenize.NL]:
author	Serhiy Storchaka <storchaka@gmail.com>	2018-04-19 06:23:03 (GMT)
committer	GitHub <noreply@github.com>	2018-04-19 06:23:03 (GMT)
commit	69524821a87251b7aee966f6e46b3810ff5aaa64 (patch)
tree	c846e23df670d6fa22ecba626d2280efd222cc86 /Tools
parent	b7e1eff8436f6e0c4aac440036092fcf96f82960 (diff)
download	cpython-69524821a87251b7aee966f6e46b3810ff5aaa64.zip cpython-69524821a87251b7aee966f6e46b3810ff5aaa64.tar.gz cpython-69524821a87251b7aee966f6e46b3810ff5aaa64.tar.bz2