summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSebastien Barre <sebastien.barre@kitware.com>2005-07-28 17:21:03 (GMT)
committerSebastien Barre <sebastien.barre@kitware.com>2005-07-28 17:21:03 (GMT)
commit6e4b6ca0d3547319887b07e60957553f19666429 (patch)
treedc27ce9b6c3a9604ae6dd80886da7b49f0eccb6f
parentfac61306a2cd6a73ec185cc7869e2e165e2ed840 (diff)
downloadCMake-6e4b6ca0d3547319887b07e60957553f19666429.zip
CMake-6e4b6ca0d3547319887b07e60957553f19666429.tar.gz
CMake-6e4b6ca0d3547319887b07e60957553f19666429.tar.bz2
ENH: add method to attempt to check if a file is text or binary
-rw-r--r--Source/kwsys/SystemTools.cxx59
-rw-r--r--Source/kwsys/SystemTools.hxx.in22
2 files changed, 80 insertions, 1 deletions
diff --git a/Source/kwsys/SystemTools.cxx b/Source/kwsys/SystemTools.cxx
index a45ce40..a6d4e34 100644
--- a/Source/kwsys/SystemTools.cxx
+++ b/Source/kwsys/SystemTools.cxx
@@ -2748,6 +2748,65 @@ bool SystemTools::FileHasSignature(const char *filename,
return res;
}
+SystemTools::FileTypeEnum
+SystemTools::DetectFileType(const char *filename,
+ unsigned long length,
+ double percent_bin)
+{
+ if (!filename || percent_bin < 0)
+ {
+ return SystemTools::FileTypeUnknown;
+ }
+
+ FILE *fp;
+ fp = fopen(filename, "rb");
+ if (!fp)
+ {
+ return SystemTools::FileTypeUnknown;
+ }
+
+ // Allocate buffer and read bytes
+
+ unsigned char *buffer = new unsigned char [length];
+ size_t read_length = fread(buffer, 1, length, fp);
+ fclose(fp);
+ if (read_length == 0)
+ {
+ return SystemTools::FileTypeUnknown;
+ }
+
+ // Loop over contents and count
+
+ size_t text_count = 0;
+
+ const unsigned char *ptr = buffer;
+ const unsigned char *buffer_end = buffer + read_length;
+
+ while (ptr != buffer_end)
+ {
+ if ((*ptr >= 0x20 && *ptr <= 0x7F) ||
+ *ptr == '\n' ||
+ *ptr == '\r' ||
+ *ptr == '\t')
+ {
+ text_count++;
+ }
+ ptr++;
+ }
+
+ delete [] buffer;
+
+ double current_percent_bin =
+ ((double)(read_length - text_count) / (double)read_length);
+
+ if (current_percent_bin >= percent_bin)
+ {
+ return SystemTools::FileTypeBinary;
+ }
+
+ return SystemTools::FileTypeText;
+}
+
bool SystemTools::LocateFileInDir(const char *filename,
const char *dir,
kwsys_stl::string& filename_found,
diff --git a/Source/kwsys/SystemTools.hxx.in b/Source/kwsys/SystemTools.hxx.in
index 8654a67..31beafc 100644
--- a/Source/kwsys/SystemTools.hxx.in
+++ b/Source/kwsys/SystemTools.hxx.in
@@ -507,12 +507,32 @@ public:
static bool FileIsSymlink(const char* name);
/**
- * return true if the file has a given signature (first set of bytes)
+ * Return true if the file has a given signature (first set of bytes)
*/
static bool FileHasSignature(
const char* filename, const char *signature, long offset = 0);
/**
+ * Attempt to detect and return the type of a file.
+ * Up to 'length' bytes are read from the file, if more than 'percent_bin' %
+ * of the bytes are non-textual elements, the file is considered binary,
+ * otherwise textual. Textual elements are bytes in the ASCII [0x20, 0x7E]
+ * range, but also \n, \r, \t.
+ * The algorithm is simplistic, and should probably check for usual file
+ * extensions, 'magic' signature, unicode, etc.
+ */
+ enum FileTypeEnum
+ {
+ FileTypeUnknown,
+ FileTypeBinary,
+ FileTypeText
+ };
+ static SystemTools::FileTypeEnum DetectFileType(
+ const char* filename,
+ unsigned long length = 256,
+ double percent_bin = 0.05);
+
+ /**
* Try to locate the file 'filename' in the directory 'dir'.
* If 'filename' is a fully qualified filename, the basename of the file is
* used to check for its existence in 'dir'.