diff options
author | Barry Warsaw <barry@python.org> | 2000-10-27 04:56:28 (GMT) |
---|---|---|
committer | Barry Warsaw <barry@python.org> | 2000-10-27 04:56:28 (GMT) |
commit | 08a8a355bebc4905f0bbd211bed4f53294e0d7b2 (patch) | |
tree | 39c1543cf1281dd099d2a9936d8bd39ced785520 | |
parent | 3aecfc96811c6d85632e50fe706bb5d02d376c00 (diff) | |
download | cpython-08a8a355bebc4905f0bbd211bed4f53294e0d7b2.zip cpython-08a8a355bebc4905f0bbd211bed4f53294e0d7b2.tar.gz cpython-08a8a355bebc4905f0bbd211bed4f53294e0d7b2.tar.bz2 |
Added the -D/--docstrings option for extraction of unmarked module,
class, method, and function docstrings.
-rwxr-xr-x | Tools/i18n/pygettext.py | 122 |
1 files changed, 82 insertions, 40 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py index f48ee78..834ba48 100755 --- a/Tools/i18n/pygettext.py +++ b/Tools/i18n/pygettext.py @@ -4,15 +4,7 @@ # minimally patched to make it even more xgettext compatible # by Peter Funk <pf@artcom-gmbh.de> -# for selftesting -try: - import fintl - _ = fintl.gettext -except ImportError: - def _(s): return s - - -__doc__ = _("""pygettext -- Python equivalent of xgettext(1) +"""pygettext -- Python equivalent of xgettext(1) Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the internationalization of C programs. Most of these tools are independent of @@ -65,7 +57,13 @@ Options: -E --escape - replace non-ASCII characters with octal escape sequences. + Replace non-ASCII characters with octal escape sequences. + + -D + --docstrings + Extract module, class, method, and function docstrings. These do not + need to be wrapped in _() markers, and in fact cannot be for Python to + consider them docstrings. -h --help @@ -93,6 +91,15 @@ Options: each msgid. The style of comments is controlled by the -S/--style option. This is the default. + -o filename + --output=filename + Rename the default output file from messages.pot to filename. If + filename is `-' then the output is sent to standard out. + + -p dir + --output-dir=dir + Output files will be placed in directory dir. + -S stylename --style stylename Specify which style to use for location comments. Two styles are @@ -103,15 +110,6 @@ Options: The style name is case insensitive. GNU style is the default. - -o filename - --output=filename - Rename the default output file from messages.pot to filename. If - filename is `-' then the output is sent to standard out. - - -p dir - --output-dir=dir - Output files will be placed in directory dir. - -v --verbose Print the names of the files being processed. @@ -132,7 +130,7 @@ Options: If `inputfile' is -, standard input is read. -""") +""" import os import sys @@ -140,7 +138,14 @@ import time import getopt import tokenize -__version__ = '1.1' +# for selftesting +try: + import fintl + _ = fintl.gettext +except ImportError: + def _(s): return s + +__version__ = '1.2' default_keywords = ['_'] DEFAULTKEYWORDS = ', '.join(default_keywords) @@ -171,9 +176,9 @@ msgstr "" def usage(code, msg=''): - print __doc__ % globals() + print >> sys.stderr, _(__doc__) % globals() if msg: - print msg + print >> sys.stderr, msg sys.exit(code) @@ -239,15 +244,48 @@ class TokenEater: self.__state = self.__waiting self.__data = [] self.__lineno = -1 + self.__freshmodule = 1 def __call__(self, ttype, tstring, stup, etup, line): # dispatch +## import token +## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ +## 'tstring:', tstring self.__state(ttype, tstring, stup[0]) def __waiting(self, ttype, tstring, lineno): + # Do docstring extractions, if enabled + if self.__options.docstrings: + # module docstring? + if self.__freshmodule: + if ttype == tokenize.STRING: + self.__addentry(safe_eval(tstring), lineno) + self.__freshmodule = 0 + elif ttype not in (tokenize.COMMENT, tokenize.NL): + self.__freshmodule = 0 + return + # class docstring? + if ttype == tokenize.NAME and tstring in ('class', 'def'): + self.__state = self.__suiteseen + return if ttype == tokenize.NAME and tstring in self.__options.keywords: self.__state = self.__keywordseen + def __suiteseen(self, ttype, tstring, lineno): + # ignore anything until we see the colon + if ttype == tokenize.OP and tstring == ':': + self.__state = self.__suitedocstring + + def __suitedocstring(self, ttype, tstring, lineno): + # ignore any intervening noise + if ttype == tokenize.STRING: + self.__addentry(safe_eval(tstring), lineno) + self.__state = self.__waiting + elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, + tokenize.COMMENT): + # there was no class docstring + self.__state = self.__waiting + def __keywordseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == '(': self.__data = [] @@ -263,29 +301,28 @@ class TokenEater: # of messages seen. Reset state for the next batch. If there # were no strings inside _(), then just ignore this entry. if self.__data: - msg = EMPTYSTRING.join(self.__data) - if not msg in self.__options.toexclude: - entry = (self.__curfile, self.__lineno) - linenos = self.__messages.get(msg) - if linenos is None: - self.__messages[msg] = [entry] - else: - linenos.append(entry) + self.__addentry(EMPTYSTRING.join(self.__data)) self.__state = self.__waiting elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) # TBD: should we warn if we seen anything else? + def __addentry(self, msg, lineno=None): + if lineno is None: + lineno = self.__lineno + if not msg in self.__options.toexclude: + entry = (self.__curfile, lineno) + self.__messages.setdefault(msg, []).append(entry) + def set_filename(self, filename): self.__curfile = filename def write(self, fp): options = self.__options timestamp = time.ctime(time.time()) - # common header - # The time stamp in the header doesn't have the same format - # as that generated by xgettext... - print >>fp, pot_header % {'time': timestamp, 'version': __version__} + # The time stamp in the header doesn't have the same format as that + # generated by xgettext... + print >> fp, pot_header % {'time': timestamp, 'version': __version__} for k, v in self.__messages.items(): if not options.writelocations: pass @@ -304,13 +341,14 @@ class TokenEater: if len(locline) + len(s) <= options.width: locline = locline + s else: - print >>fp, locline + print >> fp, locline locline = "#:" + s if len(locline) > 2: - print >>fp, locline + print >> fp, locline # TBD: sorting, normalizing - print >>fp, 'msgid', normalize(k) - print >>fp, 'msgstr ""\n' + print >> fp, 'msgid', normalize(k) + print >> fp, 'msgstr ""\n' + def main(): @@ -318,11 +356,12 @@ def main(): try: opts, args = getopt.getopt( sys.argv[1:], - 'ad:Ehk:Kno:p:S:Vvw:x:', + 'ad:DEhk:Kno:p:S:Vvw:x:', ['extract-all', 'default-domain', 'escape', 'help', 'keyword=', 'no-default-keywords', 'add-location', 'no-location', 'output=', 'output-dir=', 'style=', 'verbose', 'version', 'width=', 'exclude-file=', + 'docstrings', ]) except getopt.error, msg: usage(1, msg) @@ -343,6 +382,7 @@ def main(): verbose = 0 width = 78 excludefilename = '' + docstrings = 0 options = Options() locations = {'gnu' : options.GNU, @@ -359,6 +399,8 @@ def main(): options.outfile = arg + '.pot' elif opt in ('-E', '--escape'): options.escape = 1 + elif opt in ('-D', '--docstrings'): + options.docstrings = 1 elif opt in ('-k', '--keyword'): options.keywords.append(arg) elif opt in ('-K', '--no-default-keywords'): |