summaryrefslogtreecommitdiffstats
path: root/src/engine/SCons/Node
diff options
context:
space:
mode:
authorSteven Knight <knight@baldmt.com>2008-12-12 06:16:31 (GMT)
committerSteven Knight <knight@baldmt.com>2008-12-12 06:16:31 (GMT)
commit6554d8631debd788d6bad226d098daee080ca20e (patch)
treef468f63915d4ba1b0ea3cc2d064d5ce922cfe6e4 /src/engine/SCons/Node
parent8e1e691178fb24207d0e073a8c67bd8810211396 (diff)
downloadSCons-6554d8631debd788d6bad226d098daee080ca20e.zip
SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.gz
SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.bz2
Issue 2255: Handle scanning of UTF-8 and UTF-16 files. (Greg Spencer)
Diffstat (limited to 'src/engine/SCons/Node')
-rw-r--r--src/engine/SCons/Node/FS.py61
-rw-r--r--src/engine/SCons/Node/FSTests.py81
2 files changed, 132 insertions, 10 deletions
diff --git a/src/engine/SCons/Node/FS.py b/src/engine/SCons/Node/FS.py
index 98efc7a..bdc1bfd 100644
--- a/src/engine/SCons/Node/FS.py
+++ b/src/engine/SCons/Node/FS.py
@@ -35,8 +35,9 @@ that can be used by scripts or modules looking for the canonical default.
__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
-import fnmatch
from itertools import izip
+import cStringIO
+import fnmatch
import os
import os.path
import re
@@ -45,7 +46,11 @@ import stat
import string
import sys
import time
-import cStringIO
+
+try:
+ import codecs
+except ImportError:
+ pass
import SCons.Action
from SCons.Debug import logInstanceCreation
@@ -876,11 +881,8 @@ class Entry(Base):
return self.get_suffix()
def get_contents(self):
- """Fetch the contents of the entry.
-
- Since this should return the real contents from the file
- system, we check to see into what sort of subclass we should
- morph this Entry."""
+ """Fetch the contents of the entry. Returns the exact binary
+ contents of the file."""
try:
self = self.disambiguate(must_exist=1)
except SCons.Errors.UserError:
@@ -893,6 +895,24 @@ class Entry(Base):
else:
return self.get_contents()
+ def get_text_contents(self):
+ """Fetch the decoded text contents of a Unicode encoded Entry.
+
+ Since this should return the text contents from the file
+ system, we check to see into what sort of subclass we should
+ morph this Entry."""
+ try:
+ self = self.disambiguate(must_exist=1)
+ except SCons.Errors.UserError:
+ # There was nothing on disk with which to disambiguate
+ # this entry. Leave it as an Entry, but return a null
+ # string so calls to get_text_contents() in emitters and
+ # the like (e.g. in qt.py) don't have to disambiguate by
+ # hand or catch the exception.
+ return ''
+ else:
+ return self.get_text_contents()
+
def must_be_same(self, klass):
"""Called to make sure a Node is a Dir. Since we're an
Entry, we can morph into one."""
@@ -1598,13 +1618,18 @@ class Dir(Base):
"""A directory does not get scanned."""
return None
+ def get_text_contents(self):
+ """We already emit things in text, so just return the binary
+ version."""
+ return self.get_contents()
+
def get_contents(self):
"""Return content signatures and names of all our children
separated by new-lines. Ensure that the nodes are sorted."""
contents = []
name_cmp = lambda a, b: cmp(a.name, b.name)
sorted_children = self.children()[:]
- sorted_children.sort(name_cmp)
+ sorted_children.sort(name_cmp)
for node in sorted_children:
contents.append('%s %s\n' % (node.get_csig(), node.name))
return string.join(contents, '')
@@ -2236,12 +2261,28 @@ class File(Base):
return ''
fname = self.rfile().abspath
try:
- r = open(fname, "rb").read()
+ contents = open(fname, "rb").read()
except EnvironmentError, e:
if not e.filename:
e.filename = fname
raise
- return r
+ return contents
+
+ try:
+ import codecs
+ except ImportError:
+ get_text_contents = get_contents
+ else:
+ # This attempts to figure out what the encoding of the text is
+ # based upon the BOM bytes, and then decodes the contents so that
+ # it's a valid python string.
+ def get_text_contents(self):
+ contents = self.get_contents()
+ if contents.startswith(codecs.BOM_UTF8):
+ contents = contents.decode('utf-8')
+ elif contents.startswith(codecs.BOM_UTF16):
+ contents = contents.decode('utf-16')
+ return contents
def get_content_hash(self):
"""
diff --git a/src/engine/SCons/Node/FSTests.py b/src/engine/SCons/Node/FSTests.py
index bf6a300..424aa5e 100644
--- a/src/engine/SCons/Node/FSTests.py
+++ b/src/engine/SCons/Node/FSTests.py
@@ -1192,6 +1192,18 @@ class FSTestCase(_tempdirTestCase):
f1 = fs.File(test.workpath("binary_file"))
assert f1.get_contents() == "Foo\x1aBar", f1.get_contents()
+ try:
+ # TODO(1.5)
+ eval('test_string = u"Foo\x1aBar"')
+ except SyntaxError:
+ pass
+ else:
+ # This tests to make sure we can decode UTF-8 text files.
+ test.write("utf8_file", test_string.encode('utf-8'))
+ f1 = fs.File(test.workpath("utf8_file"))
+ assert eval('f1.get_text_contents() == u"Foo\x1aBar"'), \
+ f1.get_text_contents()
+
def nonexistent(method, s):
try:
x = method(s, create = 0)
@@ -1257,18 +1269,44 @@ class FSTestCase(_tempdirTestCase):
finally:
test.unlink("file")
+ # test Entry.get_text_contents()
+ e = fs.Entry('does_not_exist')
+ c = e.get_text_contents()
+ assert c == "", c
+ assert e.__class__ == SCons.Node.FS.Entry
+
+ test.write("file", "file\n")
+ try:
+ e = fs.Entry('file')
+ c = e.get_text_contents()
+ assert c == "file\n", c
+ assert e.__class__ == SCons.Node.FS.File
+ finally:
+ test.unlink("file")
+
test.subdir("dir")
e = fs.Entry('dir')
c = e.get_contents()
assert c == "", c
assert e.__class__ == SCons.Node.FS.Dir
+ c = e.get_text_contents()
+ try:
+ eval('assert c == u"", c')
+ except SyntaxError:
+ assert c == ""
+
if hasattr(os, 'symlink'):
os.symlink('nonexistent', test.workpath('dangling_symlink'))
e = fs.Entry('dangling_symlink')
c = e.get_contents()
assert e.__class__ == SCons.Node.FS.Entry, e.__class__
assert c == "", c
+ c = e.get_text_contents()
+ try:
+ eval('assert c == u"", c')
+ except SyntaxError:
+ assert c == "", c
test.write("tstamp", "tstamp\n")
try:
@@ -1712,6 +1750,7 @@ class DirTestCase(_tempdirTestCase):
files = string.split(d.get_contents(), '\n')
assert e.get_contents() == '', e.get_contents()
+ assert e.get_text_contents() == '', e.get_text_contents()
assert e.get_csig()+" empty" == files[0], files
assert f.get_csig()+" f" == files[1], files
assert g.get_csig()+" g" == files[2], files
@@ -2758,6 +2797,48 @@ class RepositoryTestCase(_tempdirTestCase):
finally:
test.unlink(["rep3", "contents"])
+ def test_get_text_contents(self):
+ """Ensure get_text_contents() returns text contents from
+ Repositories"""
+ fs = self.fs
+ test = self.test
+
+ # Use a test string that has a file terminator in it to make
+ # sure we read the entire file, regardless of its contents.
+ try:
+ eval('test_string = u"Con\x1aTents\n"')
+ except SyntaxError:
+ import UserString
+ class FakeUnicodeString(UserString.UserString):
+ def encode(self, encoding):
+ return str(self)
+ test_string = FakeUnicodeString("Con\x1aTents\n")
+
+
+ # Test with ASCII.
+ test.write(["rep3", "contents"], test_string.encode('ascii'))
+ try:
+ c = fs.File("contents").get_text_contents()
+ assert test_string == c, "got %s" % repr(c)
+ finally:
+ test.unlink(["rep3", "contents"])
+
+ # Test with utf-8
+ test.write(["rep3", "contents"], test_string.encode('utf-8'))
+ try:
+ c = fs.File("contents").get_text_contents()
+ assert test_string == c, "got %s" % repr(c)
+ finally:
+ test.unlink(["rep3", "contents"])
+
+ # Test with utf-16
+ test.write(["rep3", "contents"], test_string.encode('utf-16'))
+ try:
+ c = fs.File("contents").get_text_contents()
+ assert test_string == c, "got %s" % repr(c)
+ finally:
+ test.unlink(["rep3", "contents"])
+
#def test_is_up_to_date(self):