summaryrefslogtreecommitdiffstats
path: root/src/engine/SCons/Node/FS.py
diff options
context:
space:
mode:
authorSteven Knight <knight@baldmt.com>2008-12-12 06:16:31 (GMT)
committerSteven Knight <knight@baldmt.com>2008-12-12 06:16:31 (GMT)
commit6554d8631debd788d6bad226d098daee080ca20e (patch)
treef468f63915d4ba1b0ea3cc2d064d5ce922cfe6e4 /src/engine/SCons/Node/FS.py
parent8e1e691178fb24207d0e073a8c67bd8810211396 (diff)
downloadSCons-6554d8631debd788d6bad226d098daee080ca20e.zip
SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.gz
SCons-6554d8631debd788d6bad226d098daee080ca20e.tar.bz2
Issue 2255: Handle scanning of UTF-8 and UTF-16 files. (Greg Spencer)
Diffstat (limited to 'src/engine/SCons/Node/FS.py')
-rw-r--r--src/engine/SCons/Node/FS.py61
1 files changed, 51 insertions, 10 deletions
diff --git a/src/engine/SCons/Node/FS.py b/src/engine/SCons/Node/FS.py
index 98efc7a..bdc1bfd 100644
--- a/src/engine/SCons/Node/FS.py
+++ b/src/engine/SCons/Node/FS.py
@@ -35,8 +35,9 @@ that can be used by scripts or modules looking for the canonical default.
__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
-import fnmatch
from itertools import izip
+import cStringIO
+import fnmatch
import os
import os.path
import re
@@ -45,7 +46,11 @@ import stat
import string
import sys
import time
-import cStringIO
+
+try:
+ import codecs
+except ImportError:
+ pass
import SCons.Action
from SCons.Debug import logInstanceCreation
@@ -876,11 +881,8 @@ class Entry(Base):
return self.get_suffix()
def get_contents(self):
- """Fetch the contents of the entry.
-
- Since this should return the real contents from the file
- system, we check to see into what sort of subclass we should
- morph this Entry."""
+ """Fetch the contents of the entry. Returns the exact binary
+ contents of the file."""
try:
self = self.disambiguate(must_exist=1)
except SCons.Errors.UserError:
@@ -893,6 +895,24 @@ class Entry(Base):
else:
return self.get_contents()
+ def get_text_contents(self):
+ """Fetch the decoded text contents of a Unicode encoded Entry.
+
+ Since this should return the text contents from the file
+ system, we check to see into what sort of subclass we should
+ morph this Entry."""
+ try:
+ self = self.disambiguate(must_exist=1)
+ except SCons.Errors.UserError:
+ # There was nothing on disk with which to disambiguate
+ # this entry. Leave it as an Entry, but return a null
+ # string so calls to get_text_contents() in emitters and
+ # the like (e.g. in qt.py) don't have to disambiguate by
+ # hand or catch the exception.
+ return ''
+ else:
+ return self.get_text_contents()
+
def must_be_same(self, klass):
"""Called to make sure a Node is a Dir. Since we're an
Entry, we can morph into one."""
@@ -1598,13 +1618,18 @@ class Dir(Base):
"""A directory does not get scanned."""
return None
+ def get_text_contents(self):
+ """We already emit things in text, so just return the binary
+ version."""
+ return self.get_contents()
+
def get_contents(self):
"""Return content signatures and names of all our children
separated by new-lines. Ensure that the nodes are sorted."""
contents = []
name_cmp = lambda a, b: cmp(a.name, b.name)
sorted_children = self.children()[:]
- sorted_children.sort(name_cmp)
+ sorted_children.sort(name_cmp)
for node in sorted_children:
contents.append('%s %s\n' % (node.get_csig(), node.name))
return string.join(contents, '')
@@ -2236,12 +2261,28 @@ class File(Base):
return ''
fname = self.rfile().abspath
try:
- r = open(fname, "rb").read()
+ contents = open(fname, "rb").read()
except EnvironmentError, e:
if not e.filename:
e.filename = fname
raise
- return r
+ return contents
+
+ try:
+ import codecs
+ except ImportError:
+ get_text_contents = get_contents
+ else:
+ # This attempts to figure out what the encoding of the text is
+ # based upon the BOM bytes, and then decodes the contents so that
+ # it's a valid python string.
+ def get_text_contents(self):
+ contents = self.get_contents()
+ if contents.startswith(codecs.BOM_UTF8):
+ contents = contents.decode('utf-8')
+ elif contents.startswith(codecs.BOM_UTF16):
+ contents = contents.decode('utf-16')
+ return contents
def get_content_hash(self):
"""