diff options
author | Sebastien Barre <sebastien.barre@kitware.com> | 2005-07-28 17:21:03 (GMT) |
---|---|---|
committer | Sebastien Barre <sebastien.barre@kitware.com> | 2005-07-28 17:21:03 (GMT) |
commit | 6e4b6ca0d3547319887b07e60957553f19666429 (patch) | |
tree | dc27ce9b6c3a9604ae6dd80886da7b49f0eccb6f | |
parent | fac61306a2cd6a73ec185cc7869e2e165e2ed840 (diff) | |
download | CMake-6e4b6ca0d3547319887b07e60957553f19666429.zip CMake-6e4b6ca0d3547319887b07e60957553f19666429.tar.gz CMake-6e4b6ca0d3547319887b07e60957553f19666429.tar.bz2 |
ENH: add method to attempt to check if a file is text or binary
-rw-r--r-- | Source/kwsys/SystemTools.cxx | 59 | ||||
-rw-r--r-- | Source/kwsys/SystemTools.hxx.in | 22 |
2 files changed, 80 insertions, 1 deletions
diff --git a/Source/kwsys/SystemTools.cxx b/Source/kwsys/SystemTools.cxx index a45ce40..a6d4e34 100644 --- a/Source/kwsys/SystemTools.cxx +++ b/Source/kwsys/SystemTools.cxx @@ -2748,6 +2748,65 @@ bool SystemTools::FileHasSignature(const char *filename, return res; } +SystemTools::FileTypeEnum +SystemTools::DetectFileType(const char *filename, + unsigned long length, + double percent_bin) +{ + if (!filename || percent_bin < 0) + { + return SystemTools::FileTypeUnknown; + } + + FILE *fp; + fp = fopen(filename, "rb"); + if (!fp) + { + return SystemTools::FileTypeUnknown; + } + + // Allocate buffer and read bytes + + unsigned char *buffer = new unsigned char [length]; + size_t read_length = fread(buffer, 1, length, fp); + fclose(fp); + if (read_length == 0) + { + return SystemTools::FileTypeUnknown; + } + + // Loop over contents and count + + size_t text_count = 0; + + const unsigned char *ptr = buffer; + const unsigned char *buffer_end = buffer + read_length; + + while (ptr != buffer_end) + { + if ((*ptr >= 0x20 && *ptr <= 0x7F) || + *ptr == '\n' || + *ptr == '\r' || + *ptr == '\t') + { + text_count++; + } + ptr++; + } + + delete [] buffer; + + double current_percent_bin = + ((double)(read_length - text_count) / (double)read_length); + + if (current_percent_bin >= percent_bin) + { + return SystemTools::FileTypeBinary; + } + + return SystemTools::FileTypeText; +} + bool SystemTools::LocateFileInDir(const char *filename, const char *dir, kwsys_stl::string& filename_found, diff --git a/Source/kwsys/SystemTools.hxx.in b/Source/kwsys/SystemTools.hxx.in index 8654a67..31beafc 100644 --- a/Source/kwsys/SystemTools.hxx.in +++ b/Source/kwsys/SystemTools.hxx.in @@ -507,12 +507,32 @@ public: static bool FileIsSymlink(const char* name); /** - * return true if the file has a given signature (first set of bytes) + * Return true if the file has a given signature (first set of bytes) */ static bool FileHasSignature( const char* filename, const char *signature, long offset = 0); /** + * Attempt to detect and return the type of a file. + * Up to 'length' bytes are read from the file, if more than 'percent_bin' % + * of the bytes are non-textual elements, the file is considered binary, + * otherwise textual. Textual elements are bytes in the ASCII [0x20, 0x7E] + * range, but also \n, \r, \t. + * The algorithm is simplistic, and should probably check for usual file + * extensions, 'magic' signature, unicode, etc. + */ + enum FileTypeEnum + { + FileTypeUnknown, + FileTypeBinary, + FileTypeText + }; + static SystemTools::FileTypeEnum DetectFileType( + const char* filename, + unsigned long length = 256, + double percent_bin = 0.05); + + /** * Try to locate the file 'filename' in the directory 'dir'. * If 'filename' is a fully qualified filename, the basename of the file is * used to check for its existence in 'dir'. |