summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Doc/lib/libmimetypes.tex55
-rw-r--r--Lib/mimetypes.py344
-rw-r--r--Lib/test/test_mimetypes.py12
3 files changed, 263 insertions, 148 deletions
diff --git a/Doc/lib/libmimetypes.tex b/Doc/lib/libmimetypes.tex
index 327b2ba..3747fe1 100644
--- a/Doc/lib/libmimetypes.tex
+++ b/Doc/lib/libmimetypes.tex
@@ -8,10 +8,10 @@
\indexii{MIME}{content type}
-The \module{mimetypes} converts between a filename or URL and the MIME
-type associated with the filename extension. Conversions are provided
-from filename to MIME type and from MIME type to filename extension;
-encodings are not supported for the later conversion.
+The \module{mimetypes} module converts between a filename or URL and
+the MIME type associated with the filename extension. Conversions are
+provided from filename to MIME type and from MIME type to filename
+extension; encodings are not supported for the latter conversion.
The module provides one class and a number of convenience functions.
The functions are the normal interface to this module, but some
@@ -23,22 +23,31 @@ module. If the module has not been initialized, they will call
sets up.
-\begin{funcdesc}{guess_type}{filename}
+\begin{funcdesc}{guess_type}{filename\optional{, strict}}
Guess the type of a file based on its filename or URL, given by
\var{filename}. The return value is a tuple \code{(\var{type},
\var{encoding})} where \var{type} is \code{None} if the type can't be
-guessed (no or unknown suffix) or a string of the form
+guessed (missing or unknown suffix) or a string of the form
\code{'\var{type}/\var{subtype}'}, usable for a MIME
-\mailheader{content-type} header\indexii{MIME}{headers}; and encoding
-is \code{None} for no encoding or the name of the program used to
-encode (e.g. \program{compress} or \program{gzip}). The encoding is
-suitable for use as a \mailheader{Content-Encoding} header, \emph{not}
-as a \mailheader{Content-Transfer-Encoding} header. The mappings are
-table driven. Encoding suffixes are case sensitive; type suffixes are
-first tried case sensitive, then case insensitive.
+\mailheader{content-type} header\indexii{MIME}{headers}.
+
+\var{encoding} is \code{None} for no encoding or the name of the
+program used to encode (e.g. \program{compress} or \program{gzip}).
+The encoding is suitable for use as a \mailheader{Content-Encoding}
+header, \emph{not} as a \mailheader{Content-Transfer-Encoding} header.
+The mappings are table driven. Encoding suffixes are case sensitive;
+type suffixes are first tried case sensitively, then case
+insensitively.
+
+Optional \var{strict} is a flag specifying whether the list of known
+MIME types is limited to only the official types \ulink{registered
+with IANA}{http://www.isi.edu/in-notes/iana/assignments/media-types}
+are recognized. When \var{strict} is true (the default), only the
+IANA types are supported; when \var{strict} is false, some additional
+non-standard but commonly used MIME types are also recognized.
\end{funcdesc}
-\begin{funcdesc}{guess_extension}{type}
+\begin{funcdesc}{guess_extension}{type\optional{, strict}}
Guess the extension for a file based on its MIME type, given by
\var{type}.
The return value is a string giving a filename extension, including the
@@ -46,6 +55,9 @@ leading dot (\character{.}). The extension is not guaranteed to have been
associated with any particular data stream, but would be mapped to the
MIME type \var{type} by \function{guess_type()}. If no extension can
be guessed for \var{type}, \code{None} is returned.
+
+Optional \var{strict} has the same meaning as with the
+\function{guess_type()} function.
\end{funcdesc}
@@ -98,6 +110,11 @@ Dictionary mapping filename extensions to encoding types.
Dictionary mapping filename extensions to MIME types.
\end{datadesc}
+\begin{datadesc}{common_types}
+Dictionary mapping filename extensions to non-standard, but commonly
+found MIME types.
+\end{datadesc}
+
The \class{MimeTypes} class may be useful for applications which may
want more than one MIME-type database:
@@ -144,12 +161,18 @@ that of the \refmodule{mimetypes} module.
module.
\end{datadesc}
-\begin{methoddesc}{guess_extension}{type}
+\begin{datadesc}{common_types}
+ Dictionary mapping filename extensions to non-standard, but commonly
+ found MIME types. This is initially a copy of the global
+ \code{common_types} defined in the module.
+\end{datadesc}
+
+\begin{methoddesc}{guess_extension}{type\optional{, strict}}
Similar to the \function{guess_extension()} function, using the
tables stored as part of the object.
\end{methoddesc}
-\begin{methoddesc}{guess_type}{url}
+\begin{methoddesc}{guess_type}{url\optional{, strict}}
Similar to the \function{guess_type()} function, using the tables
stored as part of the object.
\end{methoddesc}
diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 06b450b..1cd424a 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -2,9 +2,9 @@
This module defines two useful functions:
-guess_type(url) -- guess the MIME type and encoding of a URL.
+guess_type(url, strict=1) -- guess the MIME type and encoding of a URL.
-guess_extension(type) -- guess the extension for a given MIME type.
+guess_extension(type, strict=1) -- guess the extension for a given MIME type.
It also contains the following, for tuning the behavior:
@@ -21,6 +21,16 @@ Functions:
init([files]) -- parse a list of files, default knownfiles
read_mime_types(file) -- parse one file, return a dictionary or None
+When run as a script, the following command line options are recognized:
+
+Usage: mimetypes.py [options] type
+Options:
+ --help / -h -- print this message and exit
+ --lenient / -l -- additionally search of some common, but non-standard
+ types.
+ --extension / -e -- guess extension instead of type
+
+More than one type argument may be given.
"""
import os
@@ -53,10 +63,11 @@ class MimeTypes:
self.encodings_map = encodings_map.copy()
self.suffix_map = suffix_map.copy()
self.types_map = types_map.copy()
+ self.common_types = common_types.copy()
for name in filenames:
self.read(name)
- def guess_type(self, url):
+ def guess_type(self, url, strict=1):
"""Guess the type of a file based on its URL.
Return value is a tuple (type, encoding) where type is None if
@@ -71,6 +82,9 @@ class MimeTypes:
The suffixes .tgz, .taz and .tz (case sensitive!) are all
mapped to '.tar.gz'. (This is table-driven too, using the
dictionary suffix_map.)
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
"""
scheme, url = urllib.splittype(url)
if scheme == 'data':
@@ -101,14 +115,21 @@ class MimeTypes:
else:
encoding = None
types_map = self.types_map
+ common_types = self.common_types
if types_map.has_key(ext):
return types_map[ext], encoding
elif types_map.has_key(ext.lower()):
return types_map[ext.lower()], encoding
+ elif strict:
+ return None, encoding
+ elif common_types.has_key(ext):
+ return common_types[ext], encoding
+ elif common_types.has_key(ext.lower()):
+ return common_types[ext.lower()], encoding
else:
return None, encoding
- def guess_extension(self, type):
+ def guess_extension(self, type, strict=1):
"""Guess the extension for a file based on its MIME type.
Return value is a string giving a filename extension,
@@ -117,11 +138,18 @@ class MimeTypes:
stream, but would be mapped to the MIME type `type' by
guess_type(). If no extension can be guessed for `type', None
is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
"""
type = type.lower()
for ext, stype in self.types_map.items():
if type == stype:
return ext
+ if not strict:
+ for ext, stype in common_types.items():
+ if type == stype:
+ return ext
return None
def read(self, filename):
@@ -149,7 +177,7 @@ class MimeTypes:
map['.' + suff] = type
-def guess_type(url):
+def guess_type(url, strict=1):
"""Guess the type of a file based on its URL.
Return value is a tuple (type, encoding) where type is None if the
@@ -163,12 +191,15 @@ def guess_type(url):
The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped
to ".tar.gz". (This is table-driven too, using the dictionary
suffix_map).
+
+ Optional `strict' argument when false adds a bunch of commonly found, but
+ non-standard types.
"""
init()
- return guess_type(url)
+ return guess_type(url, strict)
-def guess_extension(type):
+def guess_extension(type, strict=1):
"""Guess the extension for a file based on its MIME type.
Return value is a string giving a filename extension, including the
@@ -176,14 +207,17 @@ def guess_extension(type):
associated with any particular data stream, but would be mapped to the
MIME type `type' by guess_type(). If no extension can be guessed for
`type', None is returned.
+
+ Optional `strict' argument when false adds a bunch of commonly found,
+ but non-standard types.
"""
init()
- return guess_extension(type)
+ return guess_extension(type, strict)
def init(files=None):
global guess_extension, guess_type
- global suffix_map, types_map, encodings_map
+ global suffix_map, types_map, encodings_map, common_types
global inited
inited = 1
db = MimeTypes()
@@ -197,6 +231,7 @@ def init(files=None):
types_map = db.types_map
guess_extension = db.guess_extension
guess_type = db.guess_type
+ common_types = db.common_types
def read_mime_types(file):
@@ -223,133 +258,178 @@ encodings_map = {
# Before adding new types, make sure they are either registered with IANA, at
# http://www.isi.edu/in-notes/iana/assignments/media-types
# or extensions, i.e. using the x- prefix
+
+# If you add to these, please keep them sorted!
types_map = {
- '.a': 'application/octet-stream',
- '.ai': 'application/postscript',
- '.aif': 'audio/x-aiff',
- '.aifc': 'audio/x-aiff',
- '.aiff': 'audio/x-aiff',
- '.au': 'audio/basic',
- '.avi': 'video/x-msvideo',
- '.bcpio': 'application/x-bcpio',
- '.bin': 'application/octet-stream',
- '.bmp': 'image/x-ms-bmp',
- '.cdf': 'application/x-netcdf',
- '.cpio': 'application/x-cpio',
- '.csh': 'application/x-csh',
- '.css': 'text/css',
- '.dll': 'application/octet-stream',
- '.doc': 'application/msword',
- '.dvi': 'application/x-dvi',
- '.exe': 'application/octet-stream',
- '.eps': 'application/postscript',
- '.etx': 'text/x-setext',
- '.gif': 'image/gif',
- '.gtar': 'application/x-gtar',
- '.hdf': 'application/x-hdf',
- '.htm': 'text/html',
- '.html': 'text/html',
- '.ief': 'image/ief',
- '.jpe': 'image/jpeg',
- '.jpeg': 'image/jpeg',
- '.jpg': 'image/jpeg',
- '.js': 'application/x-javascript',
- '.latex': 'application/x-latex',
- '.man': 'application/x-troff-man',
- '.me': 'application/x-troff-me',
- '.mif': 'application/x-mif',
- '.mov': 'video/quicktime',
- '.movie': 'video/x-sgi-movie',
- '.mp2': 'audio/mpeg',
- '.mp3': 'audio/mpeg',
- '.mpe': 'video/mpeg',
- '.mpeg': 'video/mpeg',
- '.mpg': 'video/mpeg',
- '.ms': 'application/x-troff-ms',
- '.nc': 'application/x-netcdf',
- '.o': 'application/octet-stream',
- '.obj': 'application/octet-stream',
- '.oda': 'application/oda',
- '.pbm': 'image/x-portable-bitmap',
- '.pdf': 'application/pdf',
- '.pgm': 'image/x-portable-graymap',
- '.pnm': 'image/x-portable-anymap',
- '.png': 'image/png',
- '.ppm': 'image/x-portable-pixmap',
- '.ps': 'application/postscript',
- '.py': 'text/x-python',
- '.pyc': 'application/x-python-code',
- '.pyo': 'application/x-python-code',
- '.qt': 'video/quicktime',
- '.ras': 'image/x-cmu-raster',
- '.rgb': 'image/x-rgb',
- '.rdf': 'application/xml',
- '.roff': 'application/x-troff',
- '.rtx': 'text/richtext',
- '.sgm': 'text/x-sgml',
- '.sgml': 'text/x-sgml',
- '.sh': 'application/x-sh',
- '.shar': 'application/x-shar',
- '.snd': 'audio/basic',
- '.so': 'application/octet-stream',
- '.src': 'application/x-wais-source',
+ '.a' : 'application/octet-stream',
+ '.ai' : 'application/postscript',
+ '.aif' : 'audio/x-aiff',
+ '.aifc' : 'audio/x-aiff',
+ '.aiff' : 'audio/x-aiff',
+ '.au' : 'audio/basic',
+ '.avi' : 'video/x-msvideo',
+ '.bat' : 'text/plain',
+ '.bcpio' : 'application/x-bcpio',
+ '.bin' : 'application/octet-stream',
+ '.bmp' : 'image/x-ms-bmp',
+ '.c' : 'text/plain',
+ # Duplicates :(
+ '.cdf' : 'application/x-cdf',
+ '.cdf' : 'application/x-netcdf',
+ '.cpio' : 'application/x-cpio',
+ '.csh' : 'application/x-csh',
+ '.css' : 'text/css',
+ '.dll' : 'application/octet-stream',
+ '.doc' : 'application/msword',
+ '.dot' : 'application/msword',
+ '.dvi' : 'application/x-dvi',
+ '.eml' : 'message/rfc822',
+ '.eps' : 'application/postscript',
+ '.etx' : 'text/x-setext',
+ '.exe' : 'application/octet-stream',
+ '.gif' : 'image/gif',
+ '.gtar' : 'application/x-gtar',
+ '.h' : 'text/plain',
+ '.hdf' : 'application/x-hdf',
+ '.htm' : 'text/html',
+ '.html' : 'text/html',
+ '.ief' : 'image/ief',
+ '.jpe' : 'image/jpeg',
+ '.jpeg' : 'image/jpeg',
+ '.jpg' : 'image/jpeg',
+ '.js' : 'application/x-javascript',
+ '.ksh' : 'text/plain',
+ '.latex' : 'application/x-latex',
+ '.m1v' : 'video/mpeg',
+ '.man' : 'application/x-troff-man',
+ '.me' : 'application/x-troff-me',
+ '.mht' : 'message/rfc822',
+ '.mhtml' : 'message/rfc822',
+ '.mif' : 'application/x-mif',
+ '.mov' : 'video/quicktime',
+ '.movie' : 'video/x-sgi-movie',
+ '.mp2' : 'audio/mpeg',
+ '.mp3' : 'audio/mpeg',
+ '.mpa' : 'video/mpeg',
+ '.mpe' : 'video/mpeg',
+ '.mpeg' : 'video/mpeg',
+ '.mpg' : 'video/mpeg',
+ '.ms' : 'application/x-troff-ms',
+ '.nc' : 'application/x-netcdf',
+ '.nws' : 'message/rfc822',
+ '.o' : 'application/octet-stream',
+ '.obj' : 'application/octet-stream',
+ '.oda' : 'application/oda',
+ '.p12' : 'application/x-pkcs12',
+ '.p7c' : 'application/pkcs7-mime',
+ '.pbm' : 'image/x-portable-bitmap',
+ '.pdf' : 'application/pdf',
+ '.pfx' : 'application/x-pkcs12',
+ '.pgm' : 'image/x-portable-graymap',
+ '.pl' : 'text/plain',
+ '.png' : 'image/png',
+ '.pnm' : 'image/x-portable-anymap',
+ '.pot' : 'application/vnd.ms-powerpoint',
+ '.ppa' : 'application/vnd.ms-powerpoint',
+ '.ppm' : 'image/x-portable-pixmap',
+ '.pps' : 'application/vnd.ms-powerpoint',
+ '.ppt' : 'application/vnd.ms-powerpoint',
+ '.ps' : 'application/postscript',
+ '.pwz' : 'application/vnd.ms-powerpoint',
+ '.py' : 'text/x-python',
+ '.pyc' : 'application/x-python-code',
+ '.pyo' : 'application/x-python-code',
+ '.qt' : 'video/quicktime',
+ '.ra' : 'audio/x-pn-realaudio',
+ '.ram' : 'application/x-pn-realaudio',
+ '.ras' : 'image/x-cmu-raster',
+ '.rdf' : 'application/xml',
+ '.rgb' : 'image/x-rgb',
+ '.roff' : 'application/x-troff',
+ '.rtx' : 'text/richtext',
+ '.sgm' : 'text/x-sgml',
+ '.sgml' : 'text/x-sgml',
+ '.sh' : 'application/x-sh',
+ '.shar' : 'application/x-shar',
+ '.snd' : 'audio/basic',
+ '.so' : 'application/octet-stream',
+ '.src' : 'application/x-wais-source',
'.sv4cpio': 'application/x-sv4cpio',
- '.sv4crc': 'application/x-sv4crc',
- '.t': 'application/x-troff',
- '.tar': 'application/x-tar',
- '.tcl': 'application/x-tcl',
- '.tex': 'application/x-tex',
- '.texi': 'application/x-texinfo',
+ '.sv4crc' : 'application/x-sv4crc',
+ '.t' : 'application/x-troff',
+ '.tar' : 'application/x-tar',
+ '.tcl' : 'application/x-tcl',
+ '.tex' : 'application/x-tex',
+ '.texi' : 'application/x-texinfo',
'.texinfo': 'application/x-texinfo',
- '.tif': 'image/tiff',
- '.tiff': 'image/tiff',
- '.tr': 'application/x-troff',
- '.tsv': 'text/tab-separated-values',
- '.txt': 'text/plain',
- '.ustar': 'application/x-ustar',
- '.wav': 'audio/x-wav',
- '.xbm': 'image/x-xbitmap',
- '.xls': 'application/excel',
- '.xml': 'text/xml',
- '.xsl': 'application/xml',
- '.xpm': 'image/x-xpixmap',
- '.xwd': 'image/x-xwindowdump',
- '.zip': 'application/zip',
- '.mp3': 'audio/mpeg',
- '.ra': 'audio/x-pn-realaudio',
- '.pdf': 'application/pdf',
- '.c': 'text/plain',
- '.bat': 'text/plain',
- '.h': 'text/plain',
- '.pl': 'text/plain',
- '.ksh': 'text/plain',
- '.ram': 'application/x-pn-realaudio',
- '.cdf': 'application/x-cdf',
- '.doc': 'application/msword',
- '.dot': 'application/msword',
- '.wiz': 'application/msword',
- '.xlb': 'application/vnd.ms-excel',
- '.xls': 'application/vnd.ms-excel',
- '.ppa': 'application/vnd.ms-powerpoint',
- '.ppt': 'application/vnd.ms-powerpoint',
- '.pps': 'application/vnd.ms-powerpoint',
- '.pot': 'application/vnd.ms-powerpoint',
- '.pwz': 'application/vnd.ms-powerpoint',
- '.eml': 'message/rfc822',
- '.nws': 'message/rfc822',
- '.mht': 'message/rfc822',
- '.mhtml': 'message/rfc822',
- '.css': 'text/css',
- '.p7c': 'application/pkcs7-mime',
- '.p12': 'application/x-pkcs12',
- '.pfx': 'application/x-pkcs12',
- '.js': 'application/x-javascript',
- '.m1v': 'video/mpeg',
- '.mpa': 'video/mpeg',
- '.vcf': 'text/x-vcard',
- '.xml': 'text/xml',
+ '.tif' : 'image/tiff',
+ '.tiff' : 'image/tiff',
+ '.tr' : 'application/x-troff',
+ '.tsv' : 'text/tab-separated-values',
+ '.txt' : 'text/plain',
+ '.ustar' : 'application/x-ustar',
+ '.vcf' : 'text/x-vcard',
+ '.wav' : 'audio/x-wav',
+ '.wiz' : 'application/msword',
+ '.xbm' : 'image/x-xbitmap',
+ '.xlb' : 'application/vnd.ms-excel',
+ # Duplicates :(
+ '.xls' : 'application/excel',
+ '.xls' : 'application/vnd.ms-excel',
+ '.xml' : 'text/xml',
+ '.xpm' : 'image/x-xpixmap',
+ '.xsl' : 'application/xml',
+ '.xwd' : 'image/x-xwindowdump',
+ '.zip' : 'application/zip',
}
+# These are non-standard types, commonly found in the wild. They will only
+# match if strict=0 flag is given to the API methods.
+
+# Please sort these too
+common_types = {
+ '.jpg' : 'image/jpg',
+ '.mid' : 'audio/midi',
+ '.midi': 'audio/midi',
+ '.pct' : 'image/pict',
+ '.pic' : 'image/pict',
+ '.pict': 'image/pict',
+ '.rtf' : 'application/rtf',
+ '.xul' : 'text/xul'
+ }
+
+
+def usage(code, msg=''):
+ print __doc__
+ if msg: print msg
+ sys.exit(code)
+
+
if __name__ == '__main__':
import sys
- print guess_type(sys.argv[1])
+ import getopt
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], 'hle',
+ ['help', 'lenient', 'extension'])
+ except getopt.error, msg:
+ usage(1, msg)
+
+ strict = 1
+ extension = 0
+ for opt, arg in opts:
+ if opt in ('-h', '--help'):
+ usage(0)
+ elif opt in ('-l', '--lenient'):
+ strict = 0
+ elif opt in ('-e', '--extension'):
+ extension = 1
+ for gtype in args:
+ if extension:
+ guess = guess_extension(gtype, strict)
+ if not guess: print "I don't know anything about type", gtype
+ else: print guess
+ else:
+ guess, encoding = guess_type(gtype, strict)
+ if not guess: print "I don't know anything about type", gtype
+ else: print 'type:', guess, 'encoding:', encoding
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index 8735e27..bca5766 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -38,6 +38,18 @@ class MimeTypesTestCase(unittest.TestCase):
self.assertEqual(self.db.guess_extension("x-application/x-unittest"),
".pyunit")
+ def test_non_standard_types(self):
+ # First try strict
+ self.assertEqual(self.db.guess_type('foo.xul', strict=1),
+ (None, None))
+ self.assertEqual(self.db.guess_extension('image/jpg', strict=1),
+ None)
+ # And then non-strict
+ self.assertEqual(self.db.guess_type('foo.xul', strict=0),
+ ('text/xul', None))
+ self.assertEqual(self.db.guess_extension('image/jpg', strict=0),
+ '.jpg')
+
def test_main():
test_support.run_unittest(MimeTypesTestCase)