summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorBarry Warsaw <barry@python.org>2000-02-26 20:56:47 (GMT)
committerBarry Warsaw <barry@python.org>2000-02-26 20:56:47 (GMT)
commitc8f0892d1236df81af1811cf182692f28c85f916 (patch)
treec73dae359594c2444adb00666b9a265a24405f97 /Tools
parentabc52169b73844d94de244a0273f635993189b94 (diff)
downloadcpython-c8f0892d1236df81af1811cf182692f28c85f916.zip
cpython-c8f0892d1236df81af1811cf182692f28c85f916.tar.gz
cpython-c8f0892d1236df81af1811cf182692f28c85f916.tar.bz2
Changes submitted by Peter Funk (some fixes/additions by B.Warsaw) to
make pygettext more compatible with GNU xgettext, specifically: Added -E/--escape for allowing pass-thru of iso8859-1 characters above 7 bits. Added -o/--output option for renaming the output file from messages.pot (there's overlap with -d/--default-domain, but GNU xgettext has them both). Added -p/--output-dir for specifying the output directory for messages.pot. Added -V/--version for printing the version number. Added -w/--width for specifying the output page width (this is because now pygettext, like GNU xgettext will put several locations on the same line to cut down on vertical space). Added -x/--exclude-file for specifying a list of strings that are not to be extracted from the input files. Bumped version number to 1.0 Try to import fintl and use fintl.gettext as _ if available. Fall back is to use identity definition of _(). Moved the escape creation to a function make_escapes() so that its behavior can be controlled by the -E option. __openseen(): Support the -x option. write(): Support -w option and vertical space preserving feature. main(): Support new options.
Diffstat (limited to 'Tools')
-rwxr-xr-xTools/i18n/pygettext.py180
1 files changed, 140 insertions, 40 deletions
diff --git a/Tools/i18n/pygettext.py b/Tools/i18n/pygettext.py
index fcd6b95..4ff4962 100755
--- a/Tools/i18n/pygettext.py
+++ b/Tools/i18n/pygettext.py
@@ -1,5 +1,8 @@
#! /usr/bin/env python
# Originally written by Barry Warsaw <bwarsaw@python.org>
+#
+# minimally patched to make it even more xgettext compatible
+# by Peter Funk <pf@artcom-gmbh.de>
"""pygettext -- Python equivalent of xgettext(1)
@@ -35,7 +38,8 @@ below for how to augment this.
[2] http://www.gnu.org/software/gettext/gettext.html
NOTE: pygettext attempts to be option and feature compatible with GNU xgettext
-where ever possible.
+where ever possible. However some options are still missing or are not fully
+implemented.
Usage: pygettext [options] filename ...
@@ -45,9 +49,17 @@ Options:
--extract-all
Extract all strings
- -d default-domain
- --default-domain=default-domain
- Rename the default output file from messages.pot to default-domain.pot
+ -d name
+ --default-domain=name
+ Rename the default output file from messages.pot to name.pot
+
+ -E
+ --escape
+ replace non-ASCII characters with octal escape sequences.
+
+ -h
+ --help
+ print this help message and exit
-k [word]
--keyword[=word]
@@ -73,13 +85,31 @@ Options:
If style is omitted, Gnu is used. The style name is case
insensitive. By default, locations are included.
+ -o filename
+ --output=filename
+ Rename the default output file from messages.pot to filename.
+
+ -p dir
+ --output-dir=dir
+ Output files will be placed in directory dir.
+
-v
--verbose
Print the names of the files being processed.
- --help
- -h
- print this help message and exit
+ -V
+ --version
+ Print the version of pygettext and exit.
+
+ -w columns
+ --width=columns
+ Set width of output to columns.
+
+ -x filename
+ --exclude-file=filename
+ Specify a file that contains a list of strings that are not be
+ extracted from the input files. Each string to be excluded must
+ appear on a line by itself in the file.
"""
@@ -90,12 +120,16 @@ import time
import getopt
import tokenize
-__version__ = '0.2'
+__version__ = '1.0'
# for selftesting
-def _(s): return s
+try:
+ import fintl
+ _ = fintl.gettext
+except ImportError:
+ def _(s): return s
# The normal pot-file header. msgmerge and EMACS' po-mode work better if
@@ -125,21 +159,31 @@ def usage(code, msg=''):
print msg
sys.exit(code)
+
escapes = []
-for i in range(256):
- if i < 32 or i > 127:
- escapes.append("\\%03o" % i)
- else:
- escapes.append(chr(i))
-escapes[ord('\\')] = '\\\\'
-escapes[ord('\t')] = '\\t'
-escapes[ord('\r')] = '\\r'
-escapes[ord('\n')] = '\\n'
-escapes[ord('\"')] = '\\"'
+def make_escapes(pass_iso8859):
+ global escapes
+ for i in range(256):
+ if pass_iso8859:
+ # Allow iso-8859 characters to pass through so that e.g. 'msgid
+ # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise
+ # we escape any character outside the 32..126 range.
+ i = i % 128
+ if 32 <= i <= 126:
+ escapes.append(chr(i))
+ else:
+ escapes.append("\\%03o" % i)
+ escapes[ord('\\')] = '\\\\'
+ escapes[ord('\t')] = '\\t'
+ escapes[ord('\r')] = '\\r'
+ escapes[ord('\n')] = '\\n'
+ escapes[ord('\"')] = '\\"'
+
def escape(s):
+ global escapes
s = list(s)
for i in range(len(s)):
s[i] = escapes[ord(s[i])]
@@ -200,12 +244,13 @@ class TokenEater:
# were no strings inside _(), then just ignore this entry.
if self.__data:
msg = string.join(self.__data, '')
- entry = (self.__curfile, self.__lineno)
- linenos = self.__messages.get(msg)
- if linenos is None:
- self.__messages[msg] = [entry]
- else:
- linenos.append(entry)
+ if not msg in self.__options.toexclude:
+ entry = (self.__curfile, self.__lineno)
+ linenos = self.__messages.get(msg)
+ if linenos is None:
+ self.__messages[msg] = [entry]
+ else:
+ linenos.append(entry)
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
@@ -222,20 +267,30 @@ class TokenEater:
sys.stdout = fp
# The time stamp in the header doesn't have the same format
# as that generated by xgettext...
- print pot_header % {'time': timestamp, 'version':__version__}
+ print pot_header % {'time': timestamp, 'version': __version__}
for k, v in self.__messages.items():
- for filename, lineno in v:
- # location comments are different b/w Solaris and GNU
- d = {'filename': filename,
- 'lineno': lineno}
- if options.location == options.SOLARIS:
+ # location comments are different b/w Solaris and GNU:
+ if options.location == options.SOLARIS:
+ for filename, lineno in v:
+ d = {'filename': filename, 'lineno': lineno}
print _('# File: %(filename)s, line: %(lineno)d') % d
- elif options.location == options.GNU:
- print _('#: %(filename)s:%(lineno)d') % d
+ elif options.location == options.GNU:
+ # fit as many locations on one line, as long as the
+ # resulting line length doesn't exceeds 'options.width'
+ locline = '#:'
+ for filename, lineno in v:
+ d = {'filename': filename, 'lineno': lineno}
+ s = _(' %(filename)s:%(lineno)d') % d
+ if len(locline) + len(s) <= options.width:
+ locline = locline + s
+ else:
+ print locline
+ locline = "#:" + s
+ if len(locline) > 2:
+ print locline
# TBD: sorting, normalizing
print 'msgid', normalize(k)
- print 'msgstr ""'
- print
+ print 'msgstr ""\n'
finally:
sys.stdout = sys.__stdout__
@@ -245,9 +300,11 @@ def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
- 'k:d:n:hv',
- ['keyword', 'default-domain', 'help',
- 'add-location=', 'no-location', 'verbose'])
+ 'ad:Ehk:n:o:p:Vvw:x:',
+ ['extract-all', 'default-domain', 'escape', 'help', 'keyword',
+ 'add-location', 'no-location', 'output=', 'output-dir=',
+ 'verbose', 'version', 'width=', 'exclude-file=',
+ ])
except getopt.error, msg:
usage(1, msg)
@@ -257,10 +314,15 @@ def main():
GNU = 1
SOLARIS = 2
# defaults
+ extractall = 0 # FIXME: currently this option has no effect at all.
+ escape = 0
keywords = []
+ outpath = ''
outfile = 'messages.pot'
location = GNU
verbose = 0
+ width = 78
+ excludefilename = ''
options = Options()
locations = {'gnu' : options.GNU,
@@ -271,12 +333,16 @@ def main():
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
+ elif opt in ('-a', '--extract-all'):
+ options.extractall = 1
+ elif opt in ('-d', '--default-domain'):
+ options.outfile = arg + '.pot'
+ elif opt in ('-E', '--escape'):
+ options.escape = 1
elif opt in ('-k', '--keyword'):
if arg is None:
default_keywords = []
options.keywords.append(arg)
- elif opt in ('-d', '--default-domain'):
- options.outfile = arg + '.pot'
elif opt in ('-n', '--add-location'):
if arg is None:
arg = 'gnu'
@@ -287,12 +353,44 @@ def main():
usage(1, _('Invalid value for --add-location: %(arg)s') % d)
elif opt in ('--no-location',):
options.location = 0
+ elif opt in ('-o', '--output'):
+ options.outfile = arg
+ elif opt in ('-p', '--output-dir'):
+ options.outpath = arg
elif opt in ('-v', '--verbose'):
options.verbose = 1
+ elif opt in ('-V', '--version'):
+ print _('pygettext.py (xgettext for Python) %s') % __version__
+ sys.exit(0)
+ elif opt in ('-w', '--width'):
+ try:
+ options.width = int(arg)
+ except ValueError:
+ d = {'arg':arg}
+ usage(1, _('Invalid value for --width: %(arg)s, must be int')
+ % d)
+ elif opt in ('-x', '--exclude-file'):
+ options.excludefilename = arg
+
+ # calculate escapes
+ make_escapes(options.escapes)
# calculate all keywords
options.keywords.extend(default_keywords)
+ # initialize list of strings to exclude
+ if options.excludefilename:
+ try:
+ fp = open(options.excludefilename)
+ options.toexclude = fp.readlines()
+ fp.close()
+ except IOError:
+ sys.stderr.write(_("Can't read --exclude-file: %s") %
+ options.excludefilename)
+ sys.exit(1)
+ else:
+ options.toexclude = []
+
# slurp through all the files
eater = TokenEater(options)
for filename in args:
@@ -303,6 +401,8 @@ def main():
tokenize.tokenize(fp.readline, eater)
fp.close()
+ if options.outpath:
+ options.outfile = os.path.join(options.outpath, options.outfile)
fp = open(options.outfile, 'w')
eater.write(fp)
fp.close()