summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuido van Rossum <guido@python.org>1997-01-30 02:44:20 (GMT)
committerGuido van Rossum <guido@python.org>1997-01-30 02:44:20 (GMT)
commitd7e4705d8fd581e6522967f53aa4e3af3fd62b83 (patch)
tree27f2148e2380f078faa30b686ddd6fd71a6ad998
parentfc167c6ba2b3aedb3e1e114bc9a2d1a793d5c2bd (diff)
downloadcpython-d7e4705d8fd581e6522967f53aa4e3af3fd62b83.zip
cpython-d7e4705d8fd581e6522967f53aa4e3af3fd62b83.tar.gz
cpython-d7e4705d8fd581e6522967f53aa4e3af3fd62b83.tar.bz2
mime types guesser
-rw-r--r--Tools/webchecker/mimetypes.py190
1 files changed, 190 insertions, 0 deletions
diff --git a/Tools/webchecker/mimetypes.py b/Tools/webchecker/mimetypes.py
new file mode 100644
index 0000000..fd0e1c5
--- /dev/null
+++ b/Tools/webchecker/mimetypes.py
@@ -0,0 +1,190 @@
+"""Guess the MIME type of a file.
+
+This module defines one useful function:
+
+guess_type(url) -- guess the MIME type and encoding of a URL.
+
+It also contains the following, for tuning the behavior:
+
+Data:
+
+knownfiles -- list of files to parse
+inited -- flag set when init() has been called
+suffixes_map -- dictionary mapping suffixes to suffixes
+encodings_map -- dictionary mapping suffixes to encodings
+types_map -- dictionary mapping suffixes to types
+
+Functions:
+
+init([files]) -- parse a list of files, default knownfiles
+read_mime_types(file) -- parse one file, return a dictionary or None
+
+"""
+
+import string
+import posixpath
+
+knownfiles = [
+ "/usr/local/etc/httpd/conf/mime.types",
+ "/usr/local/lib/netscape/mime.types",
+ ]
+
+inited = 0
+
+def guess_type(url):
+ """Guess the type of a file based on its URL.
+
+ Return value is a tuple (type, encoding) where type is None if the
+ type can't be guessed (no or unknown suffix) or a string of the
+ form type/subtype, usable for a MIME Content-type header; and
+ encoding is None for no encoding or the name of the program used
+ to encode (e.g. compress or gzip). The mappings are table
+ driven. Encoding suffixes are case sensitive; type suffixes are
+ first tried case sensitive, then case insensitive.
+
+ The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
+ to ".tar.gz". (This is table-driven too, using the dictionary
+ suffixes_map).
+
+ """
+ if not inited:
+ init()
+ base, ext = posixpath.splitext(url)
+ while suffix_map.has_key(ext):
+ base, ext = posixpath.splitext(base + suffix_map[ext])
+ if encodings_map.has_key(ext):
+ encoding = encodings_map[ext]
+ base, ext = posixpath.splitext(base)
+ else:
+ encoding = None
+ if types_map.has_key(ext):
+ return types_map[ext], encoding
+ elif types_map.has_key(string.lower(ext)):
+ return types_map[string.lower(ext)], encoding
+ else:
+ return None, encoding
+
+def init(files=None):
+ global inited
+ for file in files or knownfiles:
+ s = read_mime_types(file)
+ if s:
+ for key, value in s.items():
+ types_map[key] = value
+ inited = 1
+
+def read_mime_types(file):
+ try:
+ f = open(file)
+ except IOError:
+ return None
+ map = {}
+ while 1:
+ line = f.readline()
+ if not line: break
+ words = string.split(line)
+ for i in range(len(words)):
+ if words[i][0] == '#':
+ del words[i:]
+ break
+ if not words: continue
+ type, suffixes = words[0], words[1:]
+ for suff in suffixes:
+ map['.'+suff] = type
+ f.close()
+ return map
+
+suffix_map = {
+ '.tgz': '.tar.gz',
+ '.taz': '.tar.gz',
+ '.tz': '.tar.gz',
+}
+
+encodings_map = {
+ '.gz': 'gzip',
+ '.Z': 'compress',
+ }
+
+types_map = {
+ '.a': 'application/octet-stream',
+ '.ai': 'application/postscript',
+ '.aif': 'audio/x-aiff',
+ '.aifc': 'audio/x-aiff',
+ '.aiff': 'audio/x-aiff',
+ '.au': 'audio/basic',
+ '.avi': 'video/x-msvideo',
+ '.bcpio': 'application/x-bcpio',
+ '.bin': 'application/octet-stream',
+ '.cdf': 'application/x-netcdf',
+ '.cpio': 'application/x-cpio',
+ '.csh': 'application/x-csh',
+ '.dll': 'application/octet-stream',
+ '.dvi': 'application/x-dvi',
+ '.exe': 'application/octet-stream',
+ '.eps': 'application/postscript',
+ '.etx': 'text/x-setext',
+ '.gif': 'image/gif',
+ '.gtar': 'application/x-gtar',
+ '.hdf': 'application/x-hdf',
+ '.htm': 'text/html',
+ '.html': 'text/html',
+ '.ief': 'image/ief',
+ '.jpe': 'image/jpeg',
+ '.jpeg': 'image/jpeg',
+ '.jpg': 'image/jpeg',
+ '.latex': 'application/x-latex',
+ '.man': 'application/x-troff-man',
+ '.me': 'application/x-troff-me',
+ '.mif': 'application/x-mif',
+ '.mov': 'video/quicktime',
+ '.movie': 'video/x-sgi-movie',
+ '.mpe': 'video/mpeg',
+ '.mpeg': 'video/mpeg',
+ '.mpg': 'video/mpeg',
+ '.ms': 'application/x-troff-ms',
+ '.nc': 'application/x-netcdf',
+ '.o': 'application/octet-stream',
+ '.obj': 'application/octet-stream',
+ '.oda': 'application/oda',
+ '.pbm': 'image/x-portable-bitmap',
+ '.pdf': 'application/pdf',
+ '.pgm': 'image/x-portable-graymap',
+ '.pnm': 'image/x-portable-anymap',
+ '.png': 'image/png',
+ '.ppm': 'image/x-portable-pixmap',
+ '.py': 'text/x-python',
+ '.pyc': 'application/x-python-code',
+ '.ps': 'application/postscript',
+ '.qt': 'video/quicktime',
+ '.ras': 'image/x-cmu-raster',
+ '.rgb': 'image/x-rgb',
+ '.roff': 'application/x-troff',
+ '.rtf': 'application/rtf',
+ '.rtx': 'text/richtext',
+ '.sgm': 'text/x-sgml',
+ '.sgml': 'text/x-sgml',
+ '.sh': 'application/x-sh',
+ '.shar': 'application/x-shar',
+ '.snd': 'audio/basic',
+ '.so': 'application/octet-stream',
+ '.src': 'application/x-wais-source',
+ '.sv4cpio': 'application/x-sv4cpio',
+ '.sv4crc': 'application/x-sv4crc',
+ '.t': 'application/x-troff',
+ '.tar': 'application/x-tar',
+ '.tcl': 'application/x-tcl',
+ '.tex': 'application/x-tex',
+ '.texi': 'application/x-texinfo',
+ '.texinfo': 'application/x-texinfo',
+ '.tif': 'image/tiff',
+ '.tiff': 'image/tiff',
+ '.tr': 'application/x-troff',
+ '.tsv': 'text/tab-separated-values',
+ '.txt': 'text/plain',
+ '.ustar': 'application/x-ustar',
+ '.wav': 'audio/x-wav',
+ '.xbm': 'image/x-xbitmap',
+ '.xpm': 'image/x-xpixmap',
+ '.xwd': 'image/x-xwindowdump',
+ '.zip': 'application/zip',
+ }