diff options
author | Guido van Rossum <guido@python.org> | 1997-01-30 02:44:20 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 1997-01-30 02:44:20 (GMT) |
commit | d7e4705d8fd581e6522967f53aa4e3af3fd62b83 (patch) | |
tree | 27f2148e2380f078faa30b686ddd6fd71a6ad998 | |
parent | fc167c6ba2b3aedb3e1e114bc9a2d1a793d5c2bd (diff) | |
download | cpython-d7e4705d8fd581e6522967f53aa4e3af3fd62b83.zip cpython-d7e4705d8fd581e6522967f53aa4e3af3fd62b83.tar.gz cpython-d7e4705d8fd581e6522967f53aa4e3af3fd62b83.tar.bz2 |
mime types guesser
-rw-r--r-- | Tools/webchecker/mimetypes.py | 190 |
1 files changed, 190 insertions, 0 deletions
diff --git a/Tools/webchecker/mimetypes.py b/Tools/webchecker/mimetypes.py new file mode 100644 index 0000000..fd0e1c5 --- /dev/null +++ b/Tools/webchecker/mimetypes.py @@ -0,0 +1,190 @@ +"""Guess the MIME type of a file. + +This module defines one useful function: + +guess_type(url) -- guess the MIME type and encoding of a URL. + +It also contains the following, for tuning the behavior: + +Data: + +knownfiles -- list of files to parse +inited -- flag set when init() has been called +suffixes_map -- dictionary mapping suffixes to suffixes +encodings_map -- dictionary mapping suffixes to encodings +types_map -- dictionary mapping suffixes to types + +Functions: + +init([files]) -- parse a list of files, default knownfiles +read_mime_types(file) -- parse one file, return a dictionary or None + +""" + +import string +import posixpath + +knownfiles = [ + "/usr/local/etc/httpd/conf/mime.types", + "/usr/local/lib/netscape/mime.types", + ] + +inited = 0 + +def guess_type(url): + """Guess the type of a file based on its URL. + + Return value is a tuple (type, encoding) where type is None if the + type can't be guessed (no or unknown suffix) or a string of the + form type/subtype, usable for a MIME Content-type header; and + encoding is None for no encoding or the name of the program used + to encode (e.g. compress or gzip). The mappings are table + driven. Encoding suffixes are case sensitive; type suffixes are + first tried case sensitive, then case insensitive. + + The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped + to ".tar.gz". (This is table-driven too, using the dictionary + suffixes_map). + + """ + if not inited: + init() + base, ext = posixpath.splitext(url) + while suffix_map.has_key(ext): + base, ext = posixpath.splitext(base + suffix_map[ext]) + if encodings_map.has_key(ext): + encoding = encodings_map[ext] + base, ext = posixpath.splitext(base) + else: + encoding = None + if types_map.has_key(ext): + return types_map[ext], encoding + elif types_map.has_key(string.lower(ext)): + return types_map[string.lower(ext)], encoding + else: + return None, encoding + +def init(files=None): + global inited + for file in files or knownfiles: + s = read_mime_types(file) + if s: + for key, value in s.items(): + types_map[key] = value + inited = 1 + +def read_mime_types(file): + try: + f = open(file) + except IOError: + return None + map = {} + while 1: + line = f.readline() + if not line: break + words = string.split(line) + for i in range(len(words)): + if words[i][0] == '#': + del words[i:] + break + if not words: continue + type, suffixes = words[0], words[1:] + for suff in suffixes: + map['.'+suff] = type + f.close() + return map + +suffix_map = { + '.tgz': '.tar.gz', + '.taz': '.tar.gz', + '.tz': '.tar.gz', +} + +encodings_map = { + '.gz': 'gzip', + '.Z': 'compress', + } + +types_map = { + '.a': 'application/octet-stream', + '.ai': 'application/postscript', + '.aif': 'audio/x-aiff', + '.aifc': 'audio/x-aiff', + '.aiff': 'audio/x-aiff', + '.au': 'audio/basic', + '.avi': 'video/x-msvideo', + '.bcpio': 'application/x-bcpio', + '.bin': 'application/octet-stream', + '.cdf': 'application/x-netcdf', + '.cpio': 'application/x-cpio', + '.csh': 'application/x-csh', + '.dll': 'application/octet-stream', + '.dvi': 'application/x-dvi', + '.exe': 'application/octet-stream', + '.eps': 'application/postscript', + '.etx': 'text/x-setext', + '.gif': 'image/gif', + '.gtar': 'application/x-gtar', + '.hdf': 'application/x-hdf', + '.htm': 'text/html', + '.html': 'text/html', + '.ief': 'image/ief', + '.jpe': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.jpg': 'image/jpeg', + '.latex': 'application/x-latex', + '.man': 'application/x-troff-man', + '.me': 'application/x-troff-me', + '.mif': 'application/x-mif', + '.mov': 'video/quicktime', + '.movie': 'video/x-sgi-movie', + '.mpe': 'video/mpeg', + '.mpeg': 'video/mpeg', + '.mpg': 'video/mpeg', + '.ms': 'application/x-troff-ms', + '.nc': 'application/x-netcdf', + '.o': 'application/octet-stream', + '.obj': 'application/octet-stream', + '.oda': 'application/oda', + '.pbm': 'image/x-portable-bitmap', + '.pdf': 'application/pdf', + '.pgm': 'image/x-portable-graymap', + '.pnm': 'image/x-portable-anymap', + '.png': 'image/png', + '.ppm': 'image/x-portable-pixmap', + '.py': 'text/x-python', + '.pyc': 'application/x-python-code', + '.ps': 'application/postscript', + '.qt': 'video/quicktime', + '.ras': 'image/x-cmu-raster', + '.rgb': 'image/x-rgb', + '.roff': 'application/x-troff', + '.rtf': 'application/rtf', + '.rtx': 'text/richtext', + '.sgm': 'text/x-sgml', + '.sgml': 'text/x-sgml', + '.sh': 'application/x-sh', + '.shar': 'application/x-shar', + '.snd': 'audio/basic', + '.so': 'application/octet-stream', + '.src': 'application/x-wais-source', + '.sv4cpio': 'application/x-sv4cpio', + '.sv4crc': 'application/x-sv4crc', + '.t': 'application/x-troff', + '.tar': 'application/x-tar', + '.tcl': 'application/x-tcl', + '.tex': 'application/x-tex', + '.texi': 'application/x-texinfo', + '.texinfo': 'application/x-texinfo', + '.tif': 'image/tiff', + '.tiff': 'image/tiff', + '.tr': 'application/x-troff', + '.tsv': 'text/tab-separated-values', + '.txt': 'text/plain', + '.ustar': 'application/x-ustar', + '.wav': 'audio/x-wav', + '.xbm': 'image/x-xbitmap', + '.xpm': 'image/x-xpixmap', + '.xwd': 'image/x-xwindowdump', + '.zip': 'application/zip', + } |