summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2000-10-27 04:56:28 (GMT)
committerBarry Warsaw <barry@python.org>2000-10-27 04:56:28 (GMT)
commit08a8a355bebc4905f0bbd211bed4f53294e0d7b2 (patch)
tree39c1543cf1281dd099d2a9936d8bd39ced785520
parent3aecfc96811c6d85632e50fe706bb5d02d376c00 (diff)
downloadcpython-08a8a355bebc4905f0bbd211bed4f53294e0d7b2.zip
cpython-08a8a355bebc4905f0bbd211bed4f53294e0d7b2.tar.gz
cpython-08a8a355bebc4905f0bbd211bed4f53294e0d7b2.tar.bz2
Added the -D/--docstrings option for extraction of unmarked module,
class, method, and function docstrings.
-rwxr-xr-xTools/i18n/pygettext.py122
1 files changed, 82 insertions, 40 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index f48ee78..834ba48 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -4,15 +4,7 @@
# minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
-# for selftesting
-try:
- import fintl
- _ = fintl.gettext
-except ImportError:
- def _(s): return s
-
-
-__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
+"""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
internationalization of C programs. Most of these tools are independent of
@@ -65,7 +57,13 @@ Options:
-E
--escape
- replace non-ASCII characters with octal escape sequences.
+ Replace non-ASCII characters with octal escape sequences.
+
+ -D
+ --docstrings
+ Extract module, class, method, and function docstrings. These do not
+ need to be wrapped in _() markers, and in fact cannot be for Python to
+ consider them docstrings.
-h
--help
@@ -93,6 +91,15 @@ Options:
each msgid. The style of comments is controlled by the -S/--style
option. This is the default.
+ -o filename
+ --output=filename
+ Rename the default output file from messages.pot to filename. If
+ filename is `-' then the output is sent to standard out.
+
+ -p dir
+ --output-dir=dir
+ Output files will be placed in directory dir.
+
-S stylename
--style stylename
Specify which style to use for location comments. Two styles are
@@ -103,15 +110,6 @@ Options:
The style name is case insensitive. GNU style is the default.
- -o filename
- --output=filename
- Rename the default output file from messages.pot to filename. If
- filename is `-' then the output is sent to standard out.
-
- -p dir
- --output-dir=dir
- Output files will be placed in directory dir.
-
-v
--verbose
Print the names of the files being processed.
@@ -132,7 +130,7 @@ Options:
If `inputfile' is -, standard input is read.
-""")
+"""
import os
import sys
@@ -140,7 +138,14 @@ import time
import getopt
import tokenize
-__version__ = '1.1'
+# for selftesting
+try:
+ import fintl
+ _ = fintl.gettext
+except ImportError:
+ def _(s): return s
+
+__version__ = '1.2'
default_keywords = ['_']
DEFAULTKEYWORDS = ', '.join(default_keywords)
@@ -171,9 +176,9 @@ msgstr ""
def usage(code, msg=''):
- print __doc__ % globals()
+ print >> sys.stderr, _(__doc__) % globals()
if msg:
- print msg
+ print >> sys.stderr, msg
sys.exit(code)
@@ -239,15 +244,48 @@ class TokenEater:
self.__state = self.__waiting
self.__data = []
self.__lineno = -1
+ self.__freshmodule = 1
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
+## import token
+## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
+## 'tstring:', tstring
self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno):
+ # Do docstring extractions, if enabled
+ if self.__options.docstrings:
+ # module docstring?
+ if self.__freshmodule:
+ if ttype == tokenize.STRING:
+ self.__addentry(safe_eval(tstring), lineno)
+ self.__freshmodule = 0
+ elif ttype not in (tokenize.COMMENT, tokenize.NL):
+ self.__freshmodule = 0
+ return
+ # class docstring?
+ if ttype == tokenize.NAME and tstring in ('class', 'def'):
+ self.__state = self.__suiteseen
+ return
if ttype == tokenize.NAME and tstring in self.__options.keywords:
self.__state = self.__keywordseen
+ def __suiteseen(self, ttype, tstring, lineno):
+ # ignore anything until we see the colon
+ if ttype == tokenize.OP and tstring == ':':
+ self.__state = self.__suitedocstring
+
+ def __suitedocstring(self, ttype, tstring, lineno):
+ # ignore any intervening noise
+ if ttype == tokenize.STRING:
+ self.__addentry(safe_eval(tstring), lineno)
+ self.__state = self.__waiting
+ elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
+ tokenize.COMMENT):
+ # there was no class docstring
+ self.__state = self.__waiting
+
def __keywordseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == '(':
self.__data = []
@@ -263,29 +301,28 @@ class TokenEater:
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
- msg = EMPTYSTRING.join(self.__data)
- if not msg in self.__options.toexclude:
- entry = (self.__curfile, self.__lineno)
- linenos = self.__messages.get(msg)
- if linenos is None:
- self.__messages[msg] = [entry]
- else:
- linenos.append(entry)
+ self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
# TBD: should we warn if we seen anything else?
+ def __addentry(self, msg, lineno=None):
+ if lineno is None:
+ lineno = self.__lineno
+ if not msg in self.__options.toexclude:
+ entry = (self.__curfile, lineno)
+ self.__messages.setdefault(msg, []).append(entry)
+
def set_filename(self, filename):
self.__curfile = filename
def write(self, fp):
options = self.__options
timestamp = time.ctime(time.time())
- # common header
- # The time stamp in the header doesn't have the same format
- # as that generated by xgettext...
- print >>fp, pot_header % {'time': timestamp, 'version': __version__}
+ # The time stamp in the header doesn't have the same format as that
+ # generated by xgettext...
+ print >> fp, pot_header % {'time': timestamp, 'version': __version__}
for k, v in self.__messages.items():
if not options.writelocations:
pass
@@ -304,13 +341,14 @@ class TokenEater:
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
- print >>fp, locline
+ print >> fp, locline
locline = "#:" + s
if len(locline) > 2:
- print >>fp, locline
+ print >> fp, locline
# TBD: sorting, normalizing
- print >>fp, 'msgid', normalize(k)
- print >>fp, 'msgstr ""\n'
+ print >> fp, 'msgid', normalize(k)
+ print >> fp, 'msgstr ""\n'
+
def main():
@@ -318,11 +356,12 @@ def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
- 'ad:Ehk:Kno:p:S:Vvw:x:',
+ 'ad:DEhk:Kno:p:S:Vvw:x:',
['extract-all', 'default-domain', 'escape', 'help',
'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
+ 'docstrings',
])
except getopt.error, msg:
usage(1, msg)
@@ -343,6 +382,7 @@ def main():
verbose = 0
width = 78
excludefilename = ''
+ docstrings = 0
options = Options()
locations = {'gnu' : options.GNU,
@@ -359,6 +399,8 @@ def main():
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
options.escape = 1
+ elif opt in ('-D', '--docstrings'):
+ options.docstrings = 1
elif opt in ('-k', '--keyword'):
options.keywords.append(arg)
elif opt in ('-K', '--no-default-keywords'):