summaryrefslogtreecommitdiffstats
path: root/src/engine/SCons
diff options
context:
space:
mode:
authorSteven Knight <knight@baldmt.com>2008-12-12 06:16:31 (GMT)
committerSteven Knight <knight@baldmt.com>2008-12-12 06:16:31 (GMT)
commit6554d8631debd788d6bad226d098daee080ca20e (patch)
treef468f63915d4ba1b0ea3cc2d064d5ce922cfe6e4 /src/engine/SCons
parent8e1e691178fb24207d0e073a8c67bd8810211396 (diff)
downloadSCons-6554d8631debd788d6bad226d098daee080ca20e.zip
SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.gz
SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.bz2
Issue 2255: Handle scanning of UTF-8 and UTF-16 files. (Greg Spencer)
Diffstat (limited to 'src/engine/SCons')
-rw-r--r--src/engine/SCons/Environment.py2
-rw-r--r--src/engine/SCons/Node/FS.py61
-rw-r--r--src/engine/SCons/Node/FSTests.py81
-rw-r--r--src/engine/SCons/SConfTests.py2
-rw-r--r--src/engine/SCons/Scanner/D.py2
-rw-r--r--src/engine/SCons/Scanner/Fortran.py6
-rw-r--r--src/engine/SCons/Scanner/LaTeX.py2
-rw-r--r--src/engine/SCons/Scanner/ScannerTests.py2
-rw-r--r--src/engine/SCons/Scanner/__init__.py2
-rw-r--r--src/engine/SCons/Tool/FortranCommon.py2
-rw-r--r--src/engine/SCons/Tool/jar.py6
-rw-r--r--src/engine/SCons/Tool/qt.py10
-rw-r--r--src/engine/SCons/Tool/tex.py8
13 files changed, 155 insertions, 31 deletions
diff --git a/src/engine/SCons/Environment.py b/src/engine/SCons/Environment.py
index f6123b9..2304f2f 100644
--- a/src/engine/SCons/Environment.py
+++ b/src/engine/SCons/Environment.py
@@ -429,7 +429,7 @@ class SubstitutionEnvironment:
self._dict[key] = value
def get(self, key, default=None):
- "Emulates the get() method of dictionaries."""
+ """Emulates the get() method of dictionaries."""
return self._dict.get(key, default)
def has_key(self, key):
diff --git a/src/engine/SCons/Node/FS.py b/src/engine/SCons/Node/FS.py
index 98efc7a..bdc1bfd 100644
--- a/src/engine/SCons/Node/FS.py
+++ b/src/engine/SCons/Node/FS.py
@@ -35,8 +35,9 @@ that can be used by scripts or modules looking for the canonical default.
__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
-import fnmatch
from itertools import izip
+import cStringIO
+import fnmatch
import os
import os.path
import re
@@ -45,7 +46,11 @@ import stat
import string
import sys
import time
-import cStringIO
+
+try:
+ import codecs
+except ImportError:
+ pass
import SCons.Action
from SCons.Debug import logInstanceCreation
@@ -876,11 +881,8 @@ class Entry(Base):
return self.get_suffix()
def get_contents(self):
- """Fetch the contents of the entry.
-
- Since this should return the real contents from the file
- system, we check to see into what sort of subclass we should
- morph this Entry."""
+ """Fetch the contents of the entry. Returns the exact binary
+ contents of the file."""
try:
self = self.disambiguate(must_exist=1)
except SCons.Errors.UserError:
@@ -893,6 +895,24 @@ class Entry(Base):
else:
return self.get_contents()
+ def get_text_contents(self):
+ """Fetch the decoded text contents of a Unicode encoded Entry.
+
+ Since this should return the text contents from the file
+ system, we check to see into what sort of subclass we should
+ morph this Entry."""
+ try:
+ self = self.disambiguate(must_exist=1)
+ except SCons.Errors.UserError:
+ # There was nothing on disk with which to disambiguate
+ # this entry. Leave it as an Entry, but return a null
+ # string so calls to get_text_contents() in emitters and
+ # the like (e.g. in qt.py) don't have to disambiguate by
+ # hand or catch the exception.
+ return ''
+ else:
+ return self.get_text_contents()
+
def must_be_same(self, klass):
"""Called to make sure a Node is a Dir. Since we're an
Entry, we can morph into one."""
@@ -1598,13 +1618,18 @@ class Dir(Base):
"""A directory does not get scanned."""
return None
+ def get_text_contents(self):
+ """We already emit things in text, so just return the binary
+ version."""
+ return self.get_contents()
+
def get_contents(self):
"""Return content signatures and names of all our children
separated by new-lines. Ensure that the nodes are sorted."""
contents = []
name_cmp = lambda a, b: cmp(a.name, b.name)
sorted_children = self.children()[:]
- sorted_children.sort(name_cmp)
+ sorted_children.sort(name_cmp)
for node in sorted_children:
contents.append('%s %s\n' % (node.get_csig(), node.name))
return string.join(contents, '')
@@ -2236,12 +2261,28 @@ class File(Base):
return ''
fname = self.rfile().abspath
try:
- r = open(fname, "rb").read()
+ contents = open(fname, "rb").read()
except EnvironmentError, e:
if not e.filename:
e.filename = fname
raise
- return r
+ return contents
+
+ try:
+ import codecs
+ except ImportError:
+ get_text_contents = get_contents
+ else:
+ # This attempts to figure out what the encoding of the text is
+ # based upon the BOM bytes, and then decodes the contents so that
+ # it's a valid python string.
+ def get_text_contents(self):
+ contents = self.get_contents()
+ if contents.startswith(codecs.BOM_UTF8):
+ contents = contents.decode('utf-8')
+ elif contents.startswith(codecs.BOM_UTF16):
+ contents = contents.decode('utf-16')
+ return contents
def get_content_hash(self):
"""
diff --git a/src/engine/SCons/Node/FSTests.py b/src/engine/SCons/Node/FSTests.py
index bf6a300..424aa5e 100644
--- a/src/engine/SCons/Node/FSTests.py
+++ b/src/engine/SCons/Node/FSTests.py
@@ -1192,6 +1192,18 @@ class FSTestCase(_tempdirTestCase):
f1 = fs.File(test.workpath("binary_file"))
assert f1.get_contents() == "Foo\x1aBar", f1.get_contents()
+ try:
+ # TODO(1.5)
+ eval('test_string = u"Foo\x1aBar"')
+ except SyntaxError:
+ pass
+ else:
+ # This tests to make sure we can decode UTF-8 text files.
+ test.write("utf8_file", test_string.encode('utf-8'))
+ f1 = fs.File(test.workpath("utf8_file"))
+ assert eval('f1.get_text_contents() == u"Foo\x1aBar"'), \
+ f1.get_text_contents()
+
def nonexistent(method, s):
try:
x = method(s, create = 0)
@@ -1257,18 +1269,44 @@ class FSTestCase(_tempdirTestCase):
finally:
test.unlink("file")
+ # test Entry.get_text_contents()
+ e = fs.Entry('does_not_exist')
+ c = e.get_text_contents()
+ assert c == "", c
+ assert e.__class__ == SCons.Node.FS.Entry
+
+ test.write("file", "file\n")
+ try:
+ e = fs.Entry('file')
+ c = e.get_text_contents()
+ assert c == "file\n", c
+ assert e.__class__ == SCons.Node.FS.File
+ finally:
+ test.unlink("file")
+
test.subdir("dir")
e = fs.Entry('dir')
c = e.get_contents()
assert c == "", c
assert e.__class__ == SCons.Node.FS.Dir
+ c = e.get_text_contents()
+ try:
+ eval('assert c == u"", c')
+ except SyntaxError:
+ assert c == ""
+
if hasattr(os, 'symlink'):
os.symlink('nonexistent', test.workpath('dangling_symlink'))
e = fs.Entry('dangling_symlink')
c = e.get_contents()
assert e.__class__ == SCons.Node.FS.Entry, e.__class__
assert c == "", c
+ c = e.get_text_contents()
+ try:
+ eval('assert c == u"", c')
+ except SyntaxError:
+ assert c == "", c
test.write("tstamp", "tstamp\n")
try:
@@ -1712,6 +1750,7 @@ class DirTestCase(_tempdirTestCase):
files = string.split(d.get_contents(), '\n')
assert e.get_contents() == '', e.get_contents()
+ assert e.get_text_contents() == '', e.get_text_contents()
assert e.get_csig()+" empty" == files[0], files
assert f.get_csig()+" f" == files[1], files
assert g.get_csig()+" g" == files[2], files
@@ -2758,6 +2797,48 @@ class RepositoryTestCase(_tempdirTestCase):
finally:
test.unlink(["rep3", "contents"])
+ def test_get_text_contents(self):
+ """Ensure get_text_contents() returns text contents from
+ Repositories"""
+ fs = self.fs
+ test = self.test
+
+ # Use a test string that has a file terminator in it to make
+ # sure we read the entire file, regardless of its contents.
+ try:
+ eval('test_string = u"Con\x1aTents\n"')
+ except SyntaxError:
+ import UserString
+ class FakeUnicodeString(UserString.UserString):
+ def encode(self, encoding):
+ return str(self)
+ test_string = FakeUnicodeString("Con\x1aTents\n")
+
+
+ # Test with ASCII.
+ test.write(["rep3", "contents"], test_string.encode('ascii'))
+ try:
+ c = fs.File("contents").get_text_contents()
+ assert test_string == c, "got %s" % repr(c)
+ finally:
+ test.unlink(["rep3", "contents"])
+
+ # Test with utf-8
+ test.write(["rep3", "contents"], test_string.encode('utf-8'))
+ try:
+ c = fs.File("contents").get_text_contents()
+ assert test_string == c, "got %s" % repr(c)
+ finally:
+ test.unlink(["rep3", "contents"])
+
+ # Test with utf-16
+ test.write(["rep3", "contents"], test_string.encode('utf-16'))
+ try:
+ c = fs.File("contents").get_text_contents()
+ assert test_string == c, "got %s" % repr(c)
+ finally:
+ test.unlink(["rep3", "contents"])
+
#def test_is_up_to_date(self):
diff --git a/src/engine/SCons/SConfTests.py b/src/engine/SCons/SConfTests.py
index cef3889..9974485 100644
--- a/src/engine/SCons/SConfTests.py
+++ b/src/engine/SCons/SConfTests.py
@@ -335,7 +335,7 @@ int main() {
self.scons_env[comp] = oldcomp
self.scons_env['%sFLAGS' % comp] = 'qwertyuiop'
r = func()
- assert not r, "%s worked with %sFLAGS = qwertyuiop ?" % name
+ assert not r, "%s worked with %sFLAGS = qwertyuiop ?" % (name, comp)
def test_CheckCC(self):
"""Test SConf.CheckCC()
diff --git a/src/engine/SCons/Scanner/D.py b/src/engine/SCons/Scanner/D.py
index bfbcd5d..dc3478a 100644
--- a/src/engine/SCons/Scanner/D.py
+++ b/src/engine/SCons/Scanner/D.py
@@ -63,6 +63,6 @@ class D(SCons.Scanner.Classic):
def find_include_names(self, node):
includes = []
- for i in self.cre.findall(node.get_contents()):
+ for i in self.cre.findall(node.get_text_contents()):
includes = includes + self.cre2.findall(i)
return includes
diff --git a/src/engine/SCons/Scanner/Fortran.py b/src/engine/SCons/Scanner/Fortran.py
index 31a1e16..d2358ba 100644
--- a/src/engine/SCons/Scanner/Fortran.py
+++ b/src/engine/SCons/Scanner/Fortran.py
@@ -84,11 +84,11 @@ class F90Scanner(SCons.Scanner.Classic):
mods_and_includes = node.includes
else:
# retrieve all included filenames
- includes = self.cre_incl.findall(node.get_contents())
+ includes = self.cre_incl.findall(node.get_text_contents())
# retrieve all USE'd module names
- modules = self.cre_use.findall(node.get_contents())
+ modules = self.cre_use.findall(node.get_text_contents())
# retrieve all defined module names
- defmodules = self.cre_def.findall(node.get_contents())
+ defmodules = self.cre_def.findall(node.get_text_contents())
# Remove all USE'd module names that are defined in the same file
d = {}
diff --git a/src/engine/SCons/Scanner/LaTeX.py b/src/engine/SCons/Scanner/LaTeX.py
index c499ea5..db7f555 100644
--- a/src/engine/SCons/Scanner/LaTeX.py
+++ b/src/engine/SCons/Scanner/LaTeX.py
@@ -285,7 +285,7 @@ class LaTeX(SCons.Scanner.Base):
if node.includes != None:
includes = node.includes
else:
- includes = self.cre.findall(node.get_contents())
+ includes = self.cre.findall(node.get_text_contents())
# 1. Split comma-separated lines, e.g.
# ('bibliography', 'phys,comp')
# should become two entries
diff --git a/src/engine/SCons/Scanner/ScannerTests.py b/src/engine/SCons/Scanner/ScannerTests.py
index 6e9286a..f6750dc 100644
--- a/src/engine/SCons/Scanner/ScannerTests.py
+++ b/src/engine/SCons/Scanner/ScannerTests.py
@@ -481,6 +481,8 @@ class ClassicTestCase(unittest.TestCase):
return self._exists
def get_contents(self):
return self._contents
+ def get_text_contents(self):
+ return self._contents
def get_dir(self):
return self._dir
diff --git a/src/engine/SCons/Scanner/__init__.py b/src/engine/SCons/Scanner/__init__.py
index 924b271..2d53cad 100644
--- a/src/engine/SCons/Scanner/__init__.py
+++ b/src/engine/SCons/Scanner/__init__.py
@@ -347,7 +347,7 @@ class Classic(Current):
return SCons.Node.FS._my_normcase(include)
def find_include_names(self, node):
- return self.cre.findall(node.get_contents())
+ return self.cre.findall(node.get_text_contents())
def scan(self, node, path=()):
diff --git a/src/engine/SCons/Tool/FortranCommon.py b/src/engine/SCons/Tool/FortranCommon.py
index 825cbe5..bf32ffa 100644
--- a/src/engine/SCons/Tool/FortranCommon.py
+++ b/src/engine/SCons/Tool/FortranCommon.py
@@ -67,7 +67,7 @@ def _fortranEmitter(target, source, env):
mod_regex = """(?i)^\s*MODULE\s+(?!PROCEDURE)(\w+)"""
cre = re.compile(mod_regex,re.M)
# Retrieve all USE'd module names
- modules = cre.findall(node.get_contents())
+ modules = cre.findall(node.get_text_contents())
# Remove unique items from the list
modules = SCons.Util.unique(modules)
# Convert module name to a .mod filename
diff --git a/src/engine/SCons/Tool/jar.py b/src/engine/SCons/Tool/jar.py
index 6594ecc..7018c37 100644
--- a/src/engine/SCons/Tool/jar.py
+++ b/src/engine/SCons/Tool/jar.py
@@ -49,7 +49,7 @@ def jarSources(target, source, env, for_signature):
jarchdir = env.fs.Dir(jarchdir)
result = []
for src in source:
- contents = src.get_contents()
+ contents = src.get_text_contents()
if contents[:16] != "Manifest-Version":
if jarchdir_set:
_chdir = jarchdir
@@ -70,7 +70,7 @@ def jarSources(target, source, env, for_signature):
def jarManifest(target, source, env, for_signature):
"""Look in sources for a manifest file, if any."""
for src in source:
- contents = src.get_contents()
+ contents = src.get_text_contents()
if contents[:16] == "Manifest-Version":
return src
return ''
@@ -80,7 +80,7 @@ def jarFlags(target, source, env, for_signature):
flag is specified."""
jarflags = env.subst('$JARFLAGS', target=target, source=source)
for src in source:
- contents = src.get_contents()
+ contents = src.get_text_contents()
if contents[:16] == "Manifest-Version":
if not 'm' in jarflags:
return jarflags + 'm'
diff --git a/src/engine/SCons/Tool/qt.py b/src/engine/SCons/Tool/qt.py
index d67cddb..e2aa441 100644
--- a/src/engine/SCons/Tool/qt.py
+++ b/src/engine/SCons/Tool/qt.py
@@ -138,8 +138,8 @@ class _Automoc:
print "scons: qt: '%s' is no cxx file. Discarded." % str(cpp)
# c or fortran source
continue
- #cpp_contents = comment.sub('', cpp.get_contents())
- cpp_contents = cpp.get_contents()
+ #cpp_contents = comment.sub('', cpp.get_text_contents())
+ cpp_contents = cpp.get_text_contents()
h=None
for h_ext in header_extensions:
# try to find the header file in the corresponding source
@@ -149,8 +149,8 @@ class _Automoc:
if h:
if debug:
print "scons: qt: Scanning '%s' (header of '%s')" % (str(h), str(cpp))
- #h_contents = comment.sub('', h.get_contents())
- h_contents = h.get_contents()
+ #h_contents = comment.sub('', h.get_text_contents())
+ h_contents = h.get_text_contents()
break
if not h and debug:
print "scons: qt: no header for '%s'." % (str(cpp))
@@ -221,7 +221,7 @@ def uicScannerFunc(node, env, path):
lookout = []
lookout.extend(env['CPPPATH'])
lookout.append(str(node.rfile().dir))
- includes = re.findall("<include.*?>(.*?)</include>", node.get_contents())
+ includes = re.findall("<include.*?>(.*?)</include>", node.get_text_contents())
result = []
for incFile in includes:
dep = env.FindFile(incFile,lookout)
diff --git a/src/engine/SCons/Tool/tex.py b/src/engine/SCons/Tool/tex.py
index 49da3d0..15e2d3e 100644
--- a/src/engine/SCons/Tool/tex.py
+++ b/src/engine/SCons/Tool/tex.py
@@ -198,7 +198,7 @@ def InternalLaTeXAuxAction(XXXLaTeXAction, target = None, source= None, env=None
# we have to run makeindex at least once to keep the build
# happy even if there is no index.
# Same for glossaries and nomenclature
- src_content = source[0].get_contents()
+ src_content = source[0].get_text_contents()
run_makeindex = makeindex_re.search(src_content) and not os.path.exists(targetbase + '.idx')
run_nomenclature = makenomenclature_re.search(src_content) and not os.path.exists(targetbase + '.nlo')
run_glossary = makeglossary_re.search(src_content) and not os.path.exists(targetbase + '.glo')
@@ -373,7 +373,7 @@ LaTeX_re = re.compile("\\\\document(style|class)")
def is_LaTeX(flist):
# Scan a file list to decide if it's TeX- or LaTeX-flavored.
for f in flist:
- content = f.get_contents()
+ content = f.get_text_contents()
if LaTeX_re.search(content):
return 1
return 0
@@ -422,7 +422,7 @@ def tex_pdf_emitter(target, source, env):
def ScanFiles(theFile, target, paths, file_tests, file_tests_search, env, graphics_extensions, targetdir):
# for theFile (a Node) update any file_tests and search for graphics files
# then find all included files and call ScanFiles for each of them
- content = theFile.get_contents()
+ content = theFile.get_text_contents()
if Verbose:
print " scanning ",str(theFile)
@@ -498,7 +498,7 @@ def tex_emitter_core(target, source, env, graphics_extensions):
env.Clean(target[0],auxfilename)
env.Clean(target[0],logfilename)
- content = source[0].get_contents()
+ content = source[0].get_text_contents()
idx_exists = os.path.exists(targetbase + '.idx')
nlo_exists = os.path.exists(targetbase + '.nlo')