diff options
author | Steven Knight <knight@baldmt.com> | 2008-12-12 06:16:31 (GMT) |
---|---|---|
committer | Steven Knight <knight@baldmt.com> | 2008-12-12 06:16:31 (GMT) |
commit | 6554d8631debd788d6bad226d098daee080ca20e (patch) | |
tree | f468f63915d4ba1b0ea3cc2d064d5ce922cfe6e4 /src/engine/SCons/Node | |
parent | 8e1e691178fb24207d0e073a8c67bd8810211396 (diff) | |
download | SCons-6554d8631debd788d6bad226d098daee080ca20e.zip SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.gz SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.bz2 |
Issue 2255: Handle scanning of UTF-8 and UTF-16 files. (Greg Spencer)
Diffstat (limited to 'src/engine/SCons/Node')
-rw-r--r-- | src/engine/SCons/Node/FS.py | 61 | ||||
-rw-r--r-- | src/engine/SCons/Node/FSTests.py | 81 |
2 files changed, 132 insertions, 10 deletions
diff --git a/src/engine/SCons/Node/FS.py b/src/engine/SCons/Node/FS.py index 98efc7a..bdc1bfd 100644 --- a/src/engine/SCons/Node/FS.py +++ b/src/engine/SCons/Node/FS.py @@ -35,8 +35,9 @@ that can be used by scripts or modules looking for the canonical default. __revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__" -import fnmatch from itertools import izip +import cStringIO +import fnmatch import os import os.path import re @@ -45,7 +46,11 @@ import stat import string import sys import time -import cStringIO + +try: + import codecs +except ImportError: + pass import SCons.Action from SCons.Debug import logInstanceCreation @@ -876,11 +881,8 @@ class Entry(Base): return self.get_suffix() def get_contents(self): - """Fetch the contents of the entry. - - Since this should return the real contents from the file - system, we check to see into what sort of subclass we should - morph this Entry.""" + """Fetch the contents of the entry. Returns the exact binary + contents of the file.""" try: self = self.disambiguate(must_exist=1) except SCons.Errors.UserError: @@ -893,6 +895,24 @@ class Entry(Base): else: return self.get_contents() + def get_text_contents(self): + """Fetch the decoded text contents of a Unicode encoded Entry. + + Since this should return the text contents from the file + system, we check to see into what sort of subclass we should + morph this Entry.""" + try: + self = self.disambiguate(must_exist=1) + except SCons.Errors.UserError: + # There was nothing on disk with which to disambiguate + # this entry. Leave it as an Entry, but return a null + # string so calls to get_text_contents() in emitters and + # the like (e.g. in qt.py) don't have to disambiguate by + # hand or catch the exception. + return '' + else: + return self.get_text_contents() + def must_be_same(self, klass): """Called to make sure a Node is a Dir. Since we're an Entry, we can morph into one.""" @@ -1598,13 +1618,18 @@ class Dir(Base): """A directory does not get scanned.""" return None + def get_text_contents(self): + """We already emit things in text, so just return the binary + version.""" + return self.get_contents() + def get_contents(self): """Return content signatures and names of all our children separated by new-lines. Ensure that the nodes are sorted.""" contents = [] name_cmp = lambda a, b: cmp(a.name, b.name) sorted_children = self.children()[:] - sorted_children.sort(name_cmp) + sorted_children.sort(name_cmp) for node in sorted_children: contents.append('%s %s\n' % (node.get_csig(), node.name)) return string.join(contents, '') @@ -2236,12 +2261,28 @@ class File(Base): return '' fname = self.rfile().abspath try: - r = open(fname, "rb").read() + contents = open(fname, "rb").read() except EnvironmentError, e: if not e.filename: e.filename = fname raise - return r + return contents + + try: + import codecs + except ImportError: + get_text_contents = get_contents + else: + # This attempts to figure out what the encoding of the text is + # based upon the BOM bytes, and then decodes the contents so that + # it's a valid python string. + def get_text_contents(self): + contents = self.get_contents() + if contents.startswith(codecs.BOM_UTF8): + contents = contents.decode('utf-8') + elif contents.startswith(codecs.BOM_UTF16): + contents = contents.decode('utf-16') + return contents def get_content_hash(self): """ diff --git a/src/engine/SCons/Node/FSTests.py b/src/engine/SCons/Node/FSTests.py index bf6a300..424aa5e 100644 --- a/src/engine/SCons/Node/FSTests.py +++ b/src/engine/SCons/Node/FSTests.py @@ -1192,6 +1192,18 @@ class FSTestCase(_tempdirTestCase): f1 = fs.File(test.workpath("binary_file")) assert f1.get_contents() == "Foo\x1aBar", f1.get_contents() + try: + # TODO(1.5) + eval('test_string = u"Foo\x1aBar"') + except SyntaxError: + pass + else: + # This tests to make sure we can decode UTF-8 text files. + test.write("utf8_file", test_string.encode('utf-8')) + f1 = fs.File(test.workpath("utf8_file")) + assert eval('f1.get_text_contents() == u"Foo\x1aBar"'), \ + f1.get_text_contents() + def nonexistent(method, s): try: x = method(s, create = 0) @@ -1257,18 +1269,44 @@ class FSTestCase(_tempdirTestCase): finally: test.unlink("file") + # test Entry.get_text_contents() + e = fs.Entry('does_not_exist') + c = e.get_text_contents() + assert c == "", c + assert e.__class__ == SCons.Node.FS.Entry + + test.write("file", "file\n") + try: + e = fs.Entry('file') + c = e.get_text_contents() + assert c == "file\n", c + assert e.__class__ == SCons.Node.FS.File + finally: + test.unlink("file") + test.subdir("dir") e = fs.Entry('dir') c = e.get_contents() assert c == "", c assert e.__class__ == SCons.Node.FS.Dir + c = e.get_text_contents() + try: + eval('assert c == u"", c') + except SyntaxError: + assert c == "" + if hasattr(os, 'symlink'): os.symlink('nonexistent', test.workpath('dangling_symlink')) e = fs.Entry('dangling_symlink') c = e.get_contents() assert e.__class__ == SCons.Node.FS.Entry, e.__class__ assert c == "", c + c = e.get_text_contents() + try: + eval('assert c == u"", c') + except SyntaxError: + assert c == "", c test.write("tstamp", "tstamp\n") try: @@ -1712,6 +1750,7 @@ class DirTestCase(_tempdirTestCase): files = string.split(d.get_contents(), '\n') assert e.get_contents() == '', e.get_contents() + assert e.get_text_contents() == '', e.get_text_contents() assert e.get_csig()+" empty" == files[0], files assert f.get_csig()+" f" == files[1], files assert g.get_csig()+" g" == files[2], files @@ -2758,6 +2797,48 @@ class RepositoryTestCase(_tempdirTestCase): finally: test.unlink(["rep3", "contents"]) + def test_get_text_contents(self): + """Ensure get_text_contents() returns text contents from + Repositories""" + fs = self.fs + test = self.test + + # Use a test string that has a file terminator in it to make + # sure we read the entire file, regardless of its contents. + try: + eval('test_string = u"Con\x1aTents\n"') + except SyntaxError: + import UserString + class FakeUnicodeString(UserString.UserString): + def encode(self, encoding): + return str(self) + test_string = FakeUnicodeString("Con\x1aTents\n") + + + # Test with ASCII. + test.write(["rep3", "contents"], test_string.encode('ascii')) + try: + c = fs.File("contents").get_text_contents() + assert test_string == c, "got %s" % repr(c) + finally: + test.unlink(["rep3", "contents"]) + + # Test with utf-8 + test.write(["rep3", "contents"], test_string.encode('utf-8')) + try: + c = fs.File("contents").get_text_contents() + assert test_string == c, "got %s" % repr(c) + finally: + test.unlink(["rep3", "contents"]) + + # Test with utf-16 + test.write(["rep3", "contents"], test_string.encode('utf-16')) + try: + c = fs.File("contents").get_text_contents() + assert test_string == c, "got %s" % repr(c) + finally: + test.unlink(["rep3", "contents"]) + #def test_is_up_to_date(self): |