diff options
author | Guido van Rossum <guido@python.org> | 2000-05-08 17:31:04 (GMT) |
---|---|---|
committer | Guido van Rossum <guido@python.org> | 2000-05-08 17:31:04 (GMT) |
commit | aad6761ccea28e0a0da6761570b18adc72e01c37 (patch) | |
tree | 731b55d5648f08e1bc755bcace1f836413cd8aae | |
parent | 0b095bc0929fb43157019c50e3e680a29ec94a65 (diff) | |
download | cpython-aad6761ccea28e0a0da6761570b18adc72e01c37.zip cpython-aad6761ccea28e0a0da6761570b18adc72e01c37.tar.gz cpython-aad6761ccea28e0a0da6761570b18adc72e01c37.tar.bz2 |
The usual...
56 files changed, 5051 insertions, 667 deletions
diff --git a/Lib/dos-8x3/basehttp.py b/Lib/dos-8x3/basehttp.py index 4c9645d..10a706e 100755 --- a/Lib/dos-8x3/basehttp.py +++ b/Lib/dos-8x3/basehttp.py @@ -68,7 +68,6 @@ import sys import time import socket # For gethostbyaddr() import string -import rfc822 import mimetools import SocketServer @@ -94,12 +93,16 @@ class HTTPServer(SocketServer.TCPServer): host, port = self.socket.getsockname() if not host or host == '0.0.0.0': host = socket.gethostname() - hostname, hostnames, hostaddrs = socket.gethostbyaddr(host) - if '.' not in hostname: - for host in hostnames: - if '.' in host: - hostname = host - break + try: + hostname, hostnames, hostaddrs = socket.gethostbyaddr(host) + except socket.error: + hostname = host + else: + if '.' not in hostname: + for host in hostnames: + if '.' in host: + hostname = host + break self.server_name = hostname self.server_port = port @@ -169,7 +172,7 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): This server parses the request and the headers, and then calls a function specific to the request type (<command>). Specifically, - a request SPAM will be handled by a method handle_SPAM(). If no + a request SPAM will be handled by a method do_SPAM(). If no such method exists the server sends an error response to the client. If it exists, it is called with no arguments: @@ -216,16 +219,17 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): # where each string is of the form name[/version]. server_version = "BaseHTTP/" + __version__ - def handle(self): - """Handle a single HTTP request. + def parse_request(self): + """Parse a request (internal). - You normally don't need to override this method; see the class - __doc__ string for information on how to handle specific HTTP - commands such as GET and POST. + The request should be stored in self.raw_request; the results + are in self.command, self.path, self.request_version and + self.headers. - """ + Return value is 1 for success, 0 for failure; on failure, an + error is sent back. - self.raw_requestline = self.rfile.readline() + """ self.request_version = version = "HTTP/0.9" # Default requestline = self.raw_requestline if requestline[-2:] == '\r\n': @@ -238,21 +242,35 @@ class BaseHTTPRequestHandler(SocketServer.StreamRequestHandler): [command, path, version] = words if version[:5] != 'HTTP/': self.send_error(400, "Bad request version (%s)" % `version`) - return + return 0 elif len(words) == 2: [command, path] = words if command != 'GET': self.send_error(400, "Bad HTTP/0.9 request type (%s)" % `command`) - return + return 0 else: self.send_error(400, "Bad request syntax (%s)" % `requestline`) - return + return 0 self.command, self.path, self.request_version = command, path, version self.headers = self.MessageClass(self.rfile, 0) - mname = 'do_' + command + return 1 + + def handle(self): + """Handle a single HTTP request. + + You normally don't need to override this method; see the class + __doc__ string for information on how to handle specific HTTP + commands such as GET and POST. + + """ + + self.raw_requestline = self.rfile.readline() + if not self.parse_request(): # An error code has been sent, just exit + return + mname = 'do_' + self.command if not hasattr(self, mname): - self.send_error(501, "Unsupported method (%s)" % `command`) + self.send_error(501, "Unsupported method (%s)" % `self.command`) return method = getattr(self, mname) method() diff --git a/Lib/dos-8x3/cgihttps.py b/Lib/dos-8x3/cgihttps.py index 806ef57..fa30cbd 100755 --- a/Lib/dos-8x3/cgihttps.py +++ b/Lib/dos-8x3/cgihttps.py @@ -3,6 +3,9 @@ This module builds on SimpleHTTPServer by implementing GET and POST requests to cgi-bin scripts. +If the os.fork() function is not present, this module will not work; +SystemError will be raised instead. + """ @@ -10,15 +13,18 @@ __version__ = "0.3" import os -import sys -import time -import socket import string import urllib import BaseHTTPServer import SimpleHTTPServer +try: + os.fork +except AttributeError: + raise SystemError, __name__ + " requires os.fork()" + + class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): """Complete HTTP server with GET, HEAD and POST commands. @@ -150,6 +156,9 @@ class CGIHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): ua = self.headers.getheader('user-agent') if ua: env['HTTP_USER_AGENT'] = ua + co = filter(None, self.headers.getheaders('cookie')) + if co: + env['HTTP_COOKIE'] = string.join(co, ', ') # XXX Other HTTP_* headers decoded_query = string.replace(query, '+', ' ') try: @@ -177,7 +186,7 @@ def nobody_uid(): import pwd try: nobody = pwd.getpwnam('nobody')[2] - except pwd.error: + except KeyError: nobody = 1 + max(map(lambda x: x[2], pwd.getpwall())) return nobody diff --git a/Lib/dos-8x3/configpa.py b/Lib/dos-8x3/configpa.py index dd8b6d8..e1ce9dd 100644 --- a/Lib/dos-8x3/configpa.py +++ b/Lib/dos-8x3/configpa.py @@ -33,11 +33,24 @@ ConfigParser -- responsible for for parsing a list of sections() return all the configuration section names, sans DEFAULT + has_section(section) + return whether the given section exists + options(section) return list of configuration options for the named section + has_option(section, option) + return whether the given section has the given option + read(filenames) - read and parse the list of named configuration files + read and parse the list of named configuration files, given by + name. A single filename is also allowed. Non-existing files + are ignored. + + readfp(fp, filename=None) + read and parse one configuration file, given as a file object. + The filename defaults to fp.name; it is only used in error + messages (if fp has no `name' attribute, the string `<???>' is used). get(section, option, raw=0, vars=None) return a string value for the named option. All % interpolations are @@ -158,6 +171,7 @@ class ConfigParser: return self.__sections.has_key(section) def options(self, section): + """Return a list of option names for the given section name.""" try: opts = self.__sections[section].copy() except KeyError: @@ -165,16 +179,49 @@ class ConfigParser: opts.update(self.__defaults) return opts.keys() + def has_option(self, section, option): + """Return whether the given section has the given option.""" + try: + opts = self.__sections[section] + except KeyError: + raise NoSectionError(section) + return opts.has_key(option) + def read(self, filenames): - """Read and parse a list of filenames.""" + """Read and parse a filename or a list of filenames. + + Files that cannot be opened are silently ignored; this is + designed so that you can specify a list of potential + configuration file locations (e.g. current directory, user's + home directory, systemwide directory), and all existing + configuration files in the list will be read. A single + filename may also be given. + """ if type(filenames) is type(''): filenames = [filenames] - for file in filenames: + for filename in filenames: try: - fp = open(file, 'r') - self.__read(fp) + fp = open(filename) except IOError: - pass + continue + self.__read(fp, filename) + fp.close() + + def readfp(self, fp, filename=None): + """Like read() but the argument must be a file-like object. + + The `fp' argument must have a `readline' method. Optional + second argument is the `filename', which if not given, is + taken from fp.name. If fp has no `name' attribute, `<???>' is + used. + + """ + if filename is None: + try: + filename = fp.name + except AttributeError: + filename = '<???>' + self.__read(fp, filename) def get(self, section, option, raw=0, vars=None): """Get an option value for a given section. @@ -199,7 +246,7 @@ class ConfigParser: # Update with the entry specific variables if vars: d.update(vars) - option = string.lower(option) + option = self.optionxform(option) try: rawval = d[option] except KeyError: @@ -212,7 +259,7 @@ class ConfigParser: depth = 0 while depth < 10: # Loop through this until it's done depth = depth + 1 - if not string.find(value, "%("): + if string.find(value, "%(") >= 0: try: value = value % d except KeyError, key: @@ -236,25 +283,28 @@ class ConfigParser: raise ValueError, 'Not a boolean: %s' % v return val + def optionxform(self, optionstr): + return string.lower(optionstr) + # # Regular expressions for parsing section headers and options. Note a # slight semantic change from the previous version, because of the use # of \w, _ is allowed in section header names. - __SECTCRE = re.compile( + SECTCRE = re.compile( r'\[' # [ - r'(?P<header>[-\w]+)' # `-', `_' or any alphanum + r'(?P<header>[-\w_.*,(){}]+)' # a lot of stuff found by IvL r'\]' # ] ) - __OPTCRE = re.compile( - r'(?P<option>[-.\w]+)' # - . _ alphanum - r'[ \t]*[:=][ \t]*' # any number of space/tab, + OPTCRE = re.compile( + r'(?P<option>[-\w_.*,(){}]+)' # a lot of stuff found by IvL + r'[ \t]*(?P<vi>[:=])[ \t]*' # any number of space/tab, # followed by separator # (either : or =), followed # by any # space/tab r'(?P<value>.*)$' # everything up to eol ) - def __read(self, fp): + def __read(self, fp, fpname): """Parse a sectioned setup file. The sections in setup file contains a title line at the top, @@ -277,7 +327,7 @@ class ConfigParser: if string.strip(line) == '' or line[0] in '#;': continue if string.lower(string.split(line)[0]) == 'rem' \ - and line[0] == "r": # no leading whitespace + and line[0] in "rR": # no leading whitespace continue # continuation line? if line[0] in ' \t' and cursect is not None and optname: @@ -287,7 +337,7 @@ class ConfigParser: # a section header or option header? else: # is it a section header? - mo = self.__SECTCRE.match(line) + mo = self.SECTCRE.match(line) if mo: sectname = mo.group('header') if self.__sections.has_key(sectname): @@ -301,13 +351,19 @@ class ConfigParser: optname = None # no section header in the file? elif cursect is None: - raise MissingSectionHeaderError(fp.name, lineno, `line`) + raise MissingSectionHeaderError(fpname, lineno, `line`) # an option line? else: - mo = self.__OPTCRE.match(line) + mo = self.OPTCRE.match(line) if mo: - optname, optval = mo.group('option', 'value') + optname, vi, optval = mo.group('option', 'vi', 'value') optname = string.lower(optname) + if vi in ('=', ':') and ';' in optval: + # ';' is a comment delimiter only if it follows + # a spacing character + pos = string.find(optval, ';') + if pos and optval[pos-1] in string.whitespace: + optval = optval[:pos] optval = string.strip(optval) # allow empty values if optval == '""': @@ -319,7 +375,7 @@ class ConfigParser: # raised at the end of the file and will contain a # list of all bogus lines if not e: - e = ParsingError(fp.name) + e = ParsingError(fpname) e.append(lineno, `line`) # if any parsing errors occurred, raise an exception if e: diff --git a/Lib/dos-8x3/exceptio.py b/Lib/dos-8x3/exceptio.py index e943f7b..43d1c2d 100644 --- a/Lib/dos-8x3/exceptio.py +++ b/Lib/dos-8x3/exceptio.py @@ -33,6 +33,8 @@ Exception(*) | | | +-- IOError | +-- OSError(*) + | | + | +-- WindowsError(*) | +-- EOFError +-- RuntimeError @@ -40,6 +42,9 @@ Exception(*) | +-- NotImplementedError(*) | +-- NameError + | | + | +-- UnboundLocalError(*) + | +-- AttributeError +-- SyntaxError +-- TypeError @@ -56,6 +61,9 @@ Exception(*) | +-- FloatingPointError | +-- ValueError + | | + | +-- UnicodeError(*) + | +-- SystemError +-- MemoryError """ @@ -136,6 +144,10 @@ class OSError(EnvironmentError): """OS system call failed.""" pass +class WindowsError(OSError): + """MS-Windows OS system call failed.""" + pass + class RuntimeError(StandardError): """Unspecified run-time error.""" pass @@ -208,7 +220,15 @@ class AttributeError(StandardError): pass class NameError(StandardError): - """Name not found locally or globally.""" + """Name not found globally.""" + pass + +class UnboundLocalError(NameError): + """Local name referenced but not bound to a value.""" + pass + +class UnicodeError(ValueError): + """Unicode related error.""" pass class MemoryError(StandardError): diff --git a/Lib/dos-8x3/fileinpu.py b/Lib/dos-8x3/fileinpu.py index 2e26b5b..d1b7617 100644 --- a/Lib/dos-8x3/fileinpu.py +++ b/Lib/dos-8x3/fileinpu.py @@ -73,11 +73,11 @@ XXX Possible additions: """ -import sys, os +import sys, os, stat _state = None -def input(files=(), inplace=0, backup=""): +def input(files=None, inplace=0, backup=""): global _state if _state and _state._file: raise RuntimeError, "input() already active" @@ -123,15 +123,16 @@ def isstdin(): class FileInput: - def __init__(self, files=(), inplace=0, backup=""): + def __init__(self, files=None, inplace=0, backup=""): if type(files) == type(''): files = (files,) else: - files = tuple(files) + if files is None: + files = sys.argv[1:] if not files: - files = tuple(sys.argv[1:]) - if not files: - files = ('-',) + files = ('-',) + else: + files = tuple(files) self._files = files self._inplace = inplace self._backup = backup @@ -203,10 +204,22 @@ class FileInput: self._filename + (self._backup or ".bak")) try: os.unlink(self._backupfilename) except os.error: pass - # The next three lines may raise IOError + # The next few lines may raise IOError os.rename(self._filename, self._backupfilename) self._file = open(self._backupfilename, "r") - self._output = open(self._filename, "w") + try: + perm = os.fstat(self._file.fileno())[stat.ST_MODE] + except: + self._output = open(self._filename, "w") + else: + fd = os.open(self._filename, + os.O_CREAT | os.O_WRONLY | os.O_TRUNC, + perm) + self._output = os.fdopen(fd, "w") + try: + os.chmod(self._filename, perm) + except: + pass self._savestdout = sys.stdout sys.stdout = self._output else: diff --git a/Lib/dos-8x3/formatte.py b/Lib/dos-8x3/formatte.py index 4b340d5..4d6a129 100755 --- a/Lib/dos-8x3/formatte.py +++ b/Lib/dos-8x3/formatte.py @@ -1,3 +1,23 @@ +"""Generic output formatting. + +Formatter objects transform an abstract flow of formatting events into +specific output events on writer objects. Formatters manage several stack +structures to allow various properties of a writer object to be changed and +restored; writers need not be able to handle relative changes nor any sort +of ``change back'' operation. Specific writer properties which may be +controlled via formatter objects are horizontal alignment, font, and left +margin indentations. A mechanism is provided which supports providing +arbitrary, non-exclusive style settings to a writer as well. Additional +interfaces facilitate formatting events which are not reversible, such as +paragraph separation. + +Writer objects encapsulate device interfaces. Abstract devices, such as +file formats, are supported as well as physical devices. The provided +implementations all work with abstract devices. The interface makes +available mechanisms for setting the properties which formatter objects +manage and inserting data into the output. +""" + import string import sys from types import StringType diff --git a/Lib/dos-8x3/gopherli.py b/Lib/dos-8x3/gopherli.py index 033e579..6965fbd 100755 --- a/Lib/dos-8x3/gopherli.py +++ b/Lib/dos-8x3/gopherli.py @@ -1,4 +1,4 @@ -# Gopher protocol client interface +"""Gopher protocol client interface.""" import string @@ -29,180 +29,180 @@ A_IMAGE = 'I' A_WHOIS = 'w' A_QUERY = 'q' A_GIF = 'g' -A_HTML = 'h' # HTML file -A_WWW = 'w' # WWW address +A_HTML = 'h' # HTML file +A_WWW = 'w' # WWW address A_PLUS_IMAGE = ':' A_PLUS_MOVIE = ';' A_PLUS_SOUND = '<' -# Function mapping all file types to strings; unknown types become TYPE='x' _names = dir() _type_to_name_map = {} def type_to_name(gtype): - global _type_to_name_map - if _type_to_name_map=={}: - for name in _names: - if name[:2] == 'A_': - _type_to_name_map[eval(name)] = name[2:] - if _type_to_name_map.has_key(gtype): - return _type_to_name_map[gtype] - return 'TYPE=' + `gtype` + """Map all file types to strings; unknown types become TYPE='x'.""" + global _type_to_name_map + if _type_to_name_map=={}: + for name in _names: + if name[:2] == 'A_': + _type_to_name_map[eval(name)] = name[2:] + if _type_to_name_map.has_key(gtype): + return _type_to_name_map[gtype] + return 'TYPE=' + `gtype` # Names for characters and strings CRLF = '\r\n' TAB = '\t' -# Send a selector to a given host and port, return a file with the reply def send_selector(selector, host, port = 0): - import socket - import string - if not port: - i = string.find(host, ':') - if i >= 0: - host, port = host[:i], string.atoi(host[i+1:]) - if not port: - port = DEF_PORT - elif type(port) == type(''): - port = string.atoi(port) - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.connect(host, port) - s.send(selector + CRLF) - s.shutdown(1) - return s.makefile('rb') - -# Send a selector and a query string + """Send a selector to a given host and port, return a file with the reply.""" + import socket + import string + if not port: + i = string.find(host, ':') + if i >= 0: + host, port = host[:i], string.atoi(host[i+1:]) + if not port: + port = DEF_PORT + elif type(port) == type(''): + port = string.atoi(port) + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.connect((host, port)) + s.send(selector + CRLF) + s.shutdown(1) + return s.makefile('rb') + def send_query(selector, query, host, port = 0): - return send_selector(selector + '\t' + query, host, port) + """Send a selector and a query string.""" + return send_selector(selector + '\t' + query, host, port) -# Takes a path as returned by urlparse and returns the appropriate selector def path_to_selector(path): - if path=="/": - return "/" - else: - return path[2:] # Cuts initial slash and data type identifier + """Takes a path as returned by urlparse and returns the appropriate selector.""" + if path=="/": + return "/" + else: + return path[2:] # Cuts initial slash and data type identifier -# Takes a path as returned by urlparse and maps it to a string -# See section 3.4 of RFC 1738 for details def path_to_datatype_name(path): - if path=="/": - # No way to tell, although "INDEX" is likely - return "TYPE='unknown'" - else: - return type_to_name(path[1]) + """Takes a path as returned by urlparse and maps it to a string. + See section 3.4 of RFC 1738 for details.""" + if path=="/": + # No way to tell, although "INDEX" is likely + return "TYPE='unknown'" + else: + return type_to_name(path[1]) # The following functions interpret the data returned by the gopher # server according to the expected type, e.g. textfile or directory -# Get a directory in the form of a list of entries def get_directory(f): - import string - list = [] - while 1: - line = f.readline() - if not line: - print '(Unexpected EOF from server)' - break - if line[-2:] == CRLF: - line = line[:-2] - elif line[-1:] in CRLF: - line = line[:-1] - if line == '.': - break - if not line: - print '(Empty line from server)' - continue - gtype = line[0] - parts = string.splitfields(line[1:], TAB) - if len(parts) < 4: - print '(Bad line from server:', `line`, ')' - continue - if len(parts) > 4: - if parts[4:] != ['+']: - print '(Extra info from server:', - print parts[4:], ')' - else: - parts.append('') - parts.insert(0, gtype) - list.append(parts) - return list - -# Get a text file as a list of lines, with trailing CRLF stripped + """Get a directory in the form of a list of entries.""" + import string + list = [] + while 1: + line = f.readline() + if not line: + print '(Unexpected EOF from server)' + break + if line[-2:] == CRLF: + line = line[:-2] + elif line[-1:] in CRLF: + line = line[:-1] + if line == '.': + break + if not line: + print '(Empty line from server)' + continue + gtype = line[0] + parts = string.splitfields(line[1:], TAB) + if len(parts) < 4: + print '(Bad line from server:', `line`, ')' + continue + if len(parts) > 4: + if parts[4:] != ['+']: + print '(Extra info from server:', + print parts[4:], ')' + else: + parts.append('') + parts.insert(0, gtype) + list.append(parts) + return list + def get_textfile(f): - list = [] - get_alt_textfile(f, list.append) - return list + """Get a text file as a list of lines, with trailing CRLF stripped.""" + list = [] + get_alt_textfile(f, list.append) + return list -# Get a text file and pass each line to a function, with trailing CRLF stripped def get_alt_textfile(f, func): - while 1: - line = f.readline() - if not line: - print '(Unexpected EOF from server)' - break - if line[-2:] == CRLF: - line = line[:-2] - elif line[-1:] in CRLF: - line = line[:-1] - if line == '.': - break - if line[:2] == '..': - line = line[1:] - func(line) - -# Get a binary file as one solid data block + """Get a text file and pass each line to a function, with trailing CRLF stripped.""" + while 1: + line = f.readline() + if not line: + print '(Unexpected EOF from server)' + break + if line[-2:] == CRLF: + line = line[:-2] + elif line[-1:] in CRLF: + line = line[:-1] + if line == '.': + break + if line[:2] == '..': + line = line[1:] + func(line) + def get_binary(f): - data = f.read() - return data + """Get a binary file as one solid data block.""" + data = f.read() + return data -# Get a binary file and pass each block to a function def get_alt_binary(f, func, blocksize): - while 1: - data = f.read(blocksize) - if not data: - break - func(data) + """Get a binary file and pass each block to a function.""" + while 1: + data = f.read(blocksize) + if not data: + break + func(data) -# Trivial test program def test(): - import sys - import getopt - opts, args = getopt.getopt(sys.argv[1:], '') - selector = DEF_SELECTOR - type = selector[0] - host = DEF_HOST - port = DEF_PORT - if args: - host = args[0] - args = args[1:] - if args: - type = args[0] - args = args[1:] - if len(type) > 1: - type, selector = type[0], type - else: - selector = '' - if args: - selector = args[0] - args = args[1:] - query = '' - if args: - query = args[0] - args = args[1:] - if type == A_INDEX: - f = send_query(selector, query, host) - else: - f = send_selector(selector, host) - if type == A_TEXT: - list = get_textfile(f) - for item in list: print item - elif type in (A_MENU, A_INDEX): - list = get_directory(f) - for item in list: print item - else: - data = get_binary(f) - print 'binary data:', len(data), 'bytes:', `data[:100]`[:40] + """Trivial test program.""" + import sys + import getopt + opts, args = getopt.getopt(sys.argv[1:], '') + selector = DEF_SELECTOR + type = selector[0] + host = DEF_HOST + port = DEF_PORT + if args: + host = args[0] + args = args[1:] + if args: + type = args[0] + args = args[1:] + if len(type) > 1: + type, selector = type[0], type + else: + selector = '' + if args: + selector = args[0] + args = args[1:] + query = '' + if args: + query = args[0] + args = args[1:] + if type == A_INDEX: + f = send_query(selector, query, host) + else: + f = send_selector(selector, host) + if type == A_TEXT: + list = get_textfile(f) + for item in list: print item + elif type in (A_MENU, A_INDEX): + list = get_directory(f) + for item in list: print item + else: + data = get_binary(f) + print 'binary data:', len(data), 'bytes:', `data[:100]`[:40] # Run the test when run as script if __name__ == '__main__': - test() + test() diff --git a/Lib/dos-8x3/htmlenti.py b/Lib/dos-8x3/htmlenti.py index 55aefaa..6682bf2 100755 --- a/Lib/dos-8x3/htmlenti.py +++ b/Lib/dos-8x3/htmlenti.py @@ -1,105 +1,257 @@ -# Proposed entity definitions for HTML, taken from -# http://www.w3.org/hypertext/WWW/MarkUp/html-spec/html-spec_14.html +"""HTML character entity references.""" entitydefs = { - 'lt': '<', - 'gt': '>', - 'amp': '&', - 'quot': '"', - 'nbsp': chr(160), # no-break space - 'iexcl': chr(161), # inverted exclamation mark - 'cent': chr(162), # cent sign - 'pound': chr(163), # pound sterling sign - 'curren': chr(164), # general currency sign - 'yen': chr(165), # yen sign - 'brvbar': chr(166), # broken (vertical) bar - 'sect': chr(167), # section sign - 'uml': chr(168), # umlaut (dieresis) - 'copy': chr(169), # copyright sign - 'ordf': chr(170), # ordinal indicator, feminine - 'laquo': chr(171), # angle quotation mark, left - 'not': chr(172), # not sign - 'shy': chr(173), # soft hyphen - 'reg': chr(174), # registered sign - 'macr': chr(175), # macron - 'deg': chr(176), # degree sign - 'plusmn': chr(177), # plus-or-minus sign - 'sup2': chr(178), # superscript two - 'sup3': chr(179), # superscript three - 'acute': chr(180), # acute accent - 'micro': chr(181), # micro sign - 'para': chr(182), # pilcrow (paragraph sign) - 'middot': chr(183), # middle dot - 'cedil': chr(184), # cedilla - 'sup1': chr(185), # superscript one - 'ordm': chr(186), # ordinal indicator, masculine - 'raquo': chr(187), # angle quotation mark, right - 'frac14': chr(188), # fraction one-quarter - 'frac12': chr(189), # fraction one-half - 'frac34': chr(190), # fraction three-quarters - 'iquest': chr(191), # inverted question mark - 'Agrave': chr(192), # capital A, grave accent - 'Aacute': chr(193), # capital A, acute accent - 'Acirc': chr(194), # capital A, circumflex accent - 'Atilde': chr(195), # capital A, tilde - 'Auml': chr(196), # capital A, dieresis or umlaut mark - 'Aring': chr(197), # capital A, ring - 'AElig': chr(198), # capital AE diphthong (ligature) - 'Ccedil': chr(199), # capital C, cedilla - 'Egrave': chr(200), # capital E, grave accent - 'Eacute': chr(201), # capital E, acute accent - 'Ecirc': chr(202), # capital E, circumflex accent - 'Euml': chr(203), # capital E, dieresis or umlaut mark - 'Igrave': chr(204), # capital I, grave accent - 'Iacute': chr(205), # capital I, acute accent - 'Icirc': chr(206), # capital I, circumflex accent - 'Iuml': chr(207), # capital I, dieresis or umlaut mark - 'ETH': chr(208), # capital Eth, Icelandic - 'Ntilde': chr(209), # capital N, tilde - 'Ograve': chr(210), # capital O, grave accent - 'Oacute': chr(211), # capital O, acute accent - 'Ocirc': chr(212), # capital O, circumflex accent - 'Otilde': chr(213), # capital O, tilde - 'Ouml': chr(214), # capital O, dieresis or umlaut mark - 'times': chr(215), # multiply sign - 'Oslash': chr(216), # capital O, slash - 'Ugrave': chr(217), # capital U, grave accent - 'Uacute': chr(218), # capital U, acute accent - 'Ucirc': chr(219), # capital U, circumflex accent - 'Uuml': chr(220), # capital U, dieresis or umlaut mark - 'Yacute': chr(221), # capital Y, acute accent - 'THORN': chr(222), # capital THORN, Icelandic - 'szlig': chr(223), # small sharp s, German (sz ligature) - 'agrave': chr(224), # small a, grave accent - 'aacute': chr(225), # small a, acute accent - 'acirc': chr(226), # small a, circumflex accent - 'atilde': chr(227), # small a, tilde - 'auml': chr(228), # small a, dieresis or umlaut mark - 'aring': chr(229), # small a, ring - 'aelig': chr(230), # small ae diphthong (ligature) - 'ccedil': chr(231), # small c, cedilla - 'egrave': chr(232), # small e, grave accent - 'eacute': chr(233), # small e, acute accent - 'ecirc': chr(234), # small e, circumflex accent - 'euml': chr(235), # small e, dieresis or umlaut mark - 'igrave': chr(236), # small i, grave accent - 'iacute': chr(237), # small i, acute accent - 'icirc': chr(238), # small i, circumflex accent - 'iuml': chr(239), # small i, dieresis or umlaut mark - 'eth': chr(240), # small eth, Icelandic - 'ntilde': chr(241), # small n, tilde - 'ograve': chr(242), # small o, grave accent - 'oacute': chr(243), # small o, acute accent - 'ocirc': chr(244), # small o, circumflex accent - 'otilde': chr(245), # small o, tilde - 'ouml': chr(246), # small o, dieresis or umlaut mark - 'divide': chr(247), # divide sign - 'oslash': chr(248), # small o, slash - 'ugrave': chr(249), # small u, grave accent - 'uacute': chr(250), # small u, acute accent - 'ucirc': chr(251), # small u, circumflex accent - 'uuml': chr(252), # small u, dieresis or umlaut mark - 'yacute': chr(253), # small y, acute accent - 'thorn': chr(254), # small thorn, Icelandic - 'yuml': chr(255), # small y, dieresis or umlaut mark + 'AElig': '\306', # latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 + 'Aacute': '\301', # latin capital letter A with acute, U+00C1 ISOlat1 + 'Acirc': '\302', # latin capital letter A with circumflex, U+00C2 ISOlat1 + 'Agrave': '\300', # latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 + 'Alpha': 'Α', # greek capital letter alpha, U+0391 + 'Aring': '\305', # latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 + 'Atilde': '\303', # latin capital letter A with tilde, U+00C3 ISOlat1 + 'Auml': '\304', # latin capital letter A with diaeresis, U+00C4 ISOlat1 + 'Beta': 'Β', # greek capital letter beta, U+0392 + 'Ccedil': '\307', # latin capital letter C with cedilla, U+00C7 ISOlat1 + 'Chi': 'Χ', # greek capital letter chi, U+03A7 + 'Dagger': '‡', # double dagger, U+2021 ISOpub + 'Delta': 'Δ', # greek capital letter delta, U+0394 ISOgrk3 + 'ETH': '\320', # latin capital letter ETH, U+00D0 ISOlat1 + 'Eacute': '\311', # latin capital letter E with acute, U+00C9 ISOlat1 + 'Ecirc': '\312', # latin capital letter E with circumflex, U+00CA ISOlat1 + 'Egrave': '\310', # latin capital letter E with grave, U+00C8 ISOlat1 + 'Epsilon': 'Ε', # greek capital letter epsilon, U+0395 + 'Eta': 'Η', # greek capital letter eta, U+0397 + 'Euml': '\313', # latin capital letter E with diaeresis, U+00CB ISOlat1 + 'Gamma': 'Γ', # greek capital letter gamma, U+0393 ISOgrk3 + 'Iacute': '\315', # latin capital letter I with acute, U+00CD ISOlat1 + 'Icirc': '\316', # latin capital letter I with circumflex, U+00CE ISOlat1 + 'Igrave': '\314', # latin capital letter I with grave, U+00CC ISOlat1 + 'Iota': 'Ι', # greek capital letter iota, U+0399 + 'Iuml': '\317', # latin capital letter I with diaeresis, U+00CF ISOlat1 + 'Kappa': 'Κ', # greek capital letter kappa, U+039A + 'Lambda': 'Λ', # greek capital letter lambda, U+039B ISOgrk3 + 'Mu': 'Μ', # greek capital letter mu, U+039C + 'Ntilde': '\321', # latin capital letter N with tilde, U+00D1 ISOlat1 + 'Nu': 'Ν', # greek capital letter nu, U+039D + 'OElig': 'Œ', # latin capital ligature OE, U+0152 ISOlat2 + 'Oacute': '\323', # latin capital letter O with acute, U+00D3 ISOlat1 + 'Ocirc': '\324', # latin capital letter O with circumflex, U+00D4 ISOlat1 + 'Ograve': '\322', # latin capital letter O with grave, U+00D2 ISOlat1 + 'Omega': 'Ω', # greek capital letter omega, U+03A9 ISOgrk3 + 'Omicron': 'Ο', # greek capital letter omicron, U+039F + 'Oslash': '\330', # latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 + 'Otilde': '\325', # latin capital letter O with tilde, U+00D5 ISOlat1 + 'Ouml': '\326', # latin capital letter O with diaeresis, U+00D6 ISOlat1 + 'Phi': 'Φ', # greek capital letter phi, U+03A6 ISOgrk3 + 'Pi': 'Π', # greek capital letter pi, U+03A0 ISOgrk3 + 'Prime': '″', # double prime = seconds = inches, U+2033 ISOtech + 'Psi': 'Ψ', # greek capital letter psi, U+03A8 ISOgrk3 + 'Rho': 'Ρ', # greek capital letter rho, U+03A1 + 'Scaron': 'Š', # latin capital letter S with caron, U+0160 ISOlat2 + 'Sigma': 'Σ', # greek capital letter sigma, U+03A3 ISOgrk3 + 'THORN': '\336', # latin capital letter THORN, U+00DE ISOlat1 + 'Tau': 'Τ', # greek capital letter tau, U+03A4 + 'Theta': 'Θ', # greek capital letter theta, U+0398 ISOgrk3 + 'Uacute': '\332', # latin capital letter U with acute, U+00DA ISOlat1 + 'Ucirc': '\333', # latin capital letter U with circumflex, U+00DB ISOlat1 + 'Ugrave': '\331', # latin capital letter U with grave, U+00D9 ISOlat1 + 'Upsilon': 'Υ', # greek capital letter upsilon, U+03A5 ISOgrk3 + 'Uuml': '\334', # latin capital letter U with diaeresis, U+00DC ISOlat1 + 'Xi': 'Ξ', # greek capital letter xi, U+039E ISOgrk3 + 'Yacute': '\335', # latin capital letter Y with acute, U+00DD ISOlat1 + 'Yuml': 'Ÿ', # latin capital letter Y with diaeresis, U+0178 ISOlat2 + 'Zeta': 'Ζ', # greek capital letter zeta, U+0396 + 'aacute': '\341', # latin small letter a with acute, U+00E1 ISOlat1 + 'acirc': '\342', # latin small letter a with circumflex, U+00E2 ISOlat1 + 'acute': '\264', # acute accent = spacing acute, U+00B4 ISOdia + 'aelig': '\346', # latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 + 'agrave': '\340', # latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 + 'alefsym': 'ℵ', # alef symbol = first transfinite cardinal, U+2135 NEW + 'alpha': 'α', # greek small letter alpha, U+03B1 ISOgrk3 + 'amp': '\46', # ampersand, U+0026 ISOnum + 'and': '∧', # logical and = wedge, U+2227 ISOtech + 'ang': '∠', # angle, U+2220 ISOamso + 'aring': '\345', # latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 + 'asymp': '≈', # almost equal to = asymptotic to, U+2248 ISOamsr + 'atilde': '\343', # latin small letter a with tilde, U+00E3 ISOlat1 + 'auml': '\344', # latin small letter a with diaeresis, U+00E4 ISOlat1 + 'bdquo': '„', # double low-9 quotation mark, U+201E NEW + 'beta': 'β', # greek small letter beta, U+03B2 ISOgrk3 + 'brvbar': '\246', # broken bar = broken vertical bar, U+00A6 ISOnum + 'bull': '•', # bullet = black small circle, U+2022 ISOpub + 'cap': '∩', # intersection = cap, U+2229 ISOtech + 'ccedil': '\347', # latin small letter c with cedilla, U+00E7 ISOlat1 + 'cedil': '\270', # cedilla = spacing cedilla, U+00B8 ISOdia + 'cent': '\242', # cent sign, U+00A2 ISOnum + 'chi': 'χ', # greek small letter chi, U+03C7 ISOgrk3 + 'circ': 'ˆ', # modifier letter circumflex accent, U+02C6 ISOpub + 'clubs': '♣', # black club suit = shamrock, U+2663 ISOpub + 'cong': '≅', # approximately equal to, U+2245 ISOtech + 'copy': '\251', # copyright sign, U+00A9 ISOnum + 'crarr': '↵', # downwards arrow with corner leftwards = carriage return, U+21B5 NEW + 'cup': '∪', # union = cup, U+222A ISOtech + 'curren': '\244', # currency sign, U+00A4 ISOnum + 'dArr': '⇓', # downwards double arrow, U+21D3 ISOamsa + 'dagger': '†', # dagger, U+2020 ISOpub + 'darr': '↓', # downwards arrow, U+2193 ISOnum + 'deg': '\260', # degree sign, U+00B0 ISOnum + 'delta': 'δ', # greek small letter delta, U+03B4 ISOgrk3 + 'diams': '♦', # black diamond suit, U+2666 ISOpub + 'divide': '\367', # division sign, U+00F7 ISOnum + 'eacute': '\351', # latin small letter e with acute, U+00E9 ISOlat1 + 'ecirc': '\352', # latin small letter e with circumflex, U+00EA ISOlat1 + 'egrave': '\350', # latin small letter e with grave, U+00E8 ISOlat1 + 'empty': '∅', # empty set = null set = diameter, U+2205 ISOamso + 'emsp': ' ', # em space, U+2003 ISOpub + 'ensp': ' ', # en space, U+2002 ISOpub + 'epsilon': 'ε', # greek small letter epsilon, U+03B5 ISOgrk3 + 'equiv': '≡', # identical to, U+2261 ISOtech + 'eta': 'η', # greek small letter eta, U+03B7 ISOgrk3 + 'eth': '\360', # latin small letter eth, U+00F0 ISOlat1 + 'euml': '\353', # latin small letter e with diaeresis, U+00EB ISOlat1 + 'euro': '€', # euro sign, U+20AC NEW + 'exist': '∃', # there exists, U+2203 ISOtech + 'fnof': 'ƒ', # latin small f with hook = function = florin, U+0192 ISOtech + 'forall': '∀', # for all, U+2200 ISOtech + 'frac12': '\275', # vulgar fraction one half = fraction one half, U+00BD ISOnum + 'frac14': '\274', # vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum + 'frac34': '\276', # vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum + 'frasl': '⁄', # fraction slash, U+2044 NEW + 'gamma': 'γ', # greek small letter gamma, U+03B3 ISOgrk3 + 'ge': '≥', # greater-than or equal to, U+2265 ISOtech + 'gt': '\76', # greater-than sign, U+003E ISOnum + 'hArr': '⇔', # left right double arrow, U+21D4 ISOamsa + 'harr': '↔', # left right arrow, U+2194 ISOamsa + 'hearts': '♥', # black heart suit = valentine, U+2665 ISOpub + 'hellip': '…', # horizontal ellipsis = three dot leader, U+2026 ISOpub + 'iacute': '\355', # latin small letter i with acute, U+00ED ISOlat1 + 'icirc': '\356', # latin small letter i with circumflex, U+00EE ISOlat1 + 'iexcl': '\241', # inverted exclamation mark, U+00A1 ISOnum + 'igrave': '\354', # latin small letter i with grave, U+00EC ISOlat1 + 'image': 'ℑ', # blackletter capital I = imaginary part, U+2111 ISOamso + 'infin': '∞', # infinity, U+221E ISOtech + 'int': '∫', # integral, U+222B ISOtech + 'iota': 'ι', # greek small letter iota, U+03B9 ISOgrk3 + 'iquest': '\277', # inverted question mark = turned question mark, U+00BF ISOnum + 'isin': '∈', # element of, U+2208 ISOtech + 'iuml': '\357', # latin small letter i with diaeresis, U+00EF ISOlat1 + 'kappa': 'κ', # greek small letter kappa, U+03BA ISOgrk3 + 'lArr': '⇐', # leftwards double arrow, U+21D0 ISOtech + 'lambda': 'λ', # greek small letter lambda, U+03BB ISOgrk3 + 'lang': '〈', # left-pointing angle bracket = bra, U+2329 ISOtech + 'laquo': '\253', # left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum + 'larr': '←', # leftwards arrow, U+2190 ISOnum + 'lceil': '⌈', # left ceiling = apl upstile, U+2308 ISOamsc + 'ldquo': '“', # left double quotation mark, U+201C ISOnum + 'le': '≤', # less-than or equal to, U+2264 ISOtech + 'lfloor': '⌊', # left floor = apl downstile, U+230A ISOamsc + 'lowast': '∗', # asterisk operator, U+2217 ISOtech + 'loz': '◊', # lozenge, U+25CA ISOpub + 'lrm': '‎', # left-to-right mark, U+200E NEW RFC 2070 + 'lsaquo': '‹', # single left-pointing angle quotation mark, U+2039 ISO proposed + 'lsquo': '‘', # left single quotation mark, U+2018 ISOnum + 'lt': '\74', # less-than sign, U+003C ISOnum + 'macr': '\257', # macron = spacing macron = overline = APL overbar, U+00AF ISOdia + 'mdash': '—', # em dash, U+2014 ISOpub + 'micro': '\265', # micro sign, U+00B5 ISOnum + 'middot': '\267', # middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum + 'minus': '−', # minus sign, U+2212 ISOtech + 'mu': 'μ', # greek small letter mu, U+03BC ISOgrk3 + 'nabla': '∇', # nabla = backward difference, U+2207 ISOtech + 'nbsp': '\240', # no-break space = non-breaking space, U+00A0 ISOnum + 'ndash': '–', # en dash, U+2013 ISOpub + 'ne': '≠', # not equal to, U+2260 ISOtech + 'ni': '∋', # contains as member, U+220B ISOtech + 'not': '\254', # not sign, U+00AC ISOnum + 'notin': '∉', # not an element of, U+2209 ISOtech + 'nsub': '⊄', # not a subset of, U+2284 ISOamsn + 'ntilde': '\361', # latin small letter n with tilde, U+00F1 ISOlat1 + 'nu': 'ν', # greek small letter nu, U+03BD ISOgrk3 + 'oacute': '\363', # latin small letter o with acute, U+00F3 ISOlat1 + 'ocirc': '\364', # latin small letter o with circumflex, U+00F4 ISOlat1 + 'oelig': 'œ', # latin small ligature oe, U+0153 ISOlat2 + 'ograve': '\362', # latin small letter o with grave, U+00F2 ISOlat1 + 'oline': '‾', # overline = spacing overscore, U+203E NEW + 'omega': 'ω', # greek small letter omega, U+03C9 ISOgrk3 + 'omicron': 'ο', # greek small letter omicron, U+03BF NEW + 'oplus': '⊕', # circled plus = direct sum, U+2295 ISOamsb + 'or': '∨', # logical or = vee, U+2228 ISOtech + 'ordf': '\252', # feminine ordinal indicator, U+00AA ISOnum + 'ordm': '\272', # masculine ordinal indicator, U+00BA ISOnum + 'oslash': '\370', # latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 + 'otilde': '\365', # latin small letter o with tilde, U+00F5 ISOlat1 + 'otimes': '⊗', # circled times = vector product, U+2297 ISOamsb + 'ouml': '\366', # latin small letter o with diaeresis, U+00F6 ISOlat1 + 'para': '\266', # pilcrow sign = paragraph sign, U+00B6 ISOnum + 'part': '∂', # partial differential, U+2202 ISOtech + 'permil': '‰', # per mille sign, U+2030 ISOtech + 'perp': '⊥', # up tack = orthogonal to = perpendicular, U+22A5 ISOtech + 'phi': 'φ', # greek small letter phi, U+03C6 ISOgrk3 + 'pi': 'π', # greek small letter pi, U+03C0 ISOgrk3 + 'piv': 'ϖ', # greek pi symbol, U+03D6 ISOgrk3 + 'plusmn': '\261', # plus-minus sign = plus-or-minus sign, U+00B1 ISOnum + 'pound': '\243', # pound sign, U+00A3 ISOnum + 'prime': '′', # prime = minutes = feet, U+2032 ISOtech + 'prod': '∏', # n-ary product = product sign, U+220F ISOamsb + 'prop': '∝', # proportional to, U+221D ISOtech + 'psi': 'ψ', # greek small letter psi, U+03C8 ISOgrk3 + 'quot': '\42', # quotation mark = APL quote, U+0022 ISOnum + 'rArr': '⇒', # rightwards double arrow, U+21D2 ISOtech + 'radic': '√', # square root = radical sign, U+221A ISOtech + 'rang': '〉', # right-pointing angle bracket = ket, U+232A ISOtech + 'raquo': '\273', # right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum + 'rarr': '→', # rightwards arrow, U+2192 ISOnum + 'rceil': '⌉', # right ceiling, U+2309 ISOamsc + 'rdquo': '”', # right double quotation mark, U+201D ISOnum + 'real': 'ℜ', # blackletter capital R = real part symbol, U+211C ISOamso + 'reg': '\256', # registered sign = registered trade mark sign, U+00AE ISOnum + 'rfloor': '⌋', # right floor, U+230B ISOamsc + 'rho': 'ρ', # greek small letter rho, U+03C1 ISOgrk3 + 'rlm': '‏', # right-to-left mark, U+200F NEW RFC 2070 + 'rsaquo': '›', # single right-pointing angle quotation mark, U+203A ISO proposed + 'rsquo': '’', # right single quotation mark, U+2019 ISOnum + 'sbquo': '‚', # single low-9 quotation mark, U+201A NEW + 'scaron': 'š', # latin small letter s with caron, U+0161 ISOlat2 + 'sdot': '⋅', # dot operator, U+22C5 ISOamsb + 'sect': '\247', # section sign, U+00A7 ISOnum + 'shy': '\255', # soft hyphen = discretionary hyphen, U+00AD ISOnum + 'sigma': 'σ', # greek small letter sigma, U+03C3 ISOgrk3 + 'sigmaf': 'ς', # greek small letter final sigma, U+03C2 ISOgrk3 + 'sim': '∼', # tilde operator = varies with = similar to, U+223C ISOtech + 'spades': '♠', # black spade suit, U+2660 ISOpub + 'sub': '⊂', # subset of, U+2282 ISOtech + 'sube': '⊆', # subset of or equal to, U+2286 ISOtech + 'sum': '∑', # n-ary sumation, U+2211 ISOamsb + 'sup': '⊃', # superset of, U+2283 ISOtech + 'sup1': '\271', # superscript one = superscript digit one, U+00B9 ISOnum + 'sup2': '\262', # superscript two = superscript digit two = squared, U+00B2 ISOnum + 'sup3': '\263', # superscript three = superscript digit three = cubed, U+00B3 ISOnum + 'supe': '⊇', # superset of or equal to, U+2287 ISOtech + 'szlig': '\337', # latin small letter sharp s = ess-zed, U+00DF ISOlat1 + 'tau': 'τ', # greek small letter tau, U+03C4 ISOgrk3 + 'there4': '∴', # therefore, U+2234 ISOtech + 'theta': 'θ', # greek small letter theta, U+03B8 ISOgrk3 + 'thetasym': 'ϑ', # greek small letter theta symbol, U+03D1 NEW + 'thinsp': ' ', # thin space, U+2009 ISOpub + 'thorn': '\376', # latin small letter thorn with, U+00FE ISOlat1 + 'tilde': '˜', # small tilde, U+02DC ISOdia + 'times': '\327', # multiplication sign, U+00D7 ISOnum + 'trade': '™', # trade mark sign, U+2122 ISOnum + 'uArr': '⇑', # upwards double arrow, U+21D1 ISOamsa + 'uacute': '\372', # latin small letter u with acute, U+00FA ISOlat1 + 'uarr': '↑', # upwards arrow, U+2191 ISOnum + 'ucirc': '\373', # latin small letter u with circumflex, U+00FB ISOlat1 + 'ugrave': '\371', # latin small letter u with grave, U+00F9 ISOlat1 + 'uml': '\250', # diaeresis = spacing diaeresis, U+00A8 ISOdia + 'upsih': 'ϒ', # greek upsilon with hook symbol, U+03D2 NEW + 'upsilon': 'υ', # greek small letter upsilon, U+03C5 ISOgrk3 + 'uuml': '\374', # latin small letter u with diaeresis, U+00FC ISOlat1 + 'weierp': '℘', # script capital P = power set = Weierstrass p, U+2118 ISOamso + 'xi': 'ξ', # greek small letter xi, U+03BE ISOgrk3 + 'yacute': '\375', # latin small letter y with acute, U+00FD ISOlat1 + 'yen': '\245', # yen sign = yuan sign, U+00A5 ISOnum + 'yuml': '\377', # latin small letter y with diaeresis, U+00FF ISOlat1 + 'zeta': 'ζ', # greek small letter zeta, U+03B6 ISOgrk3 + 'zwj': '‍', # zero width joiner, U+200D NEW RFC 2070 + 'zwnj': '‌', # zero width non-joiner, U+200C NEW RFC 2070 + } diff --git a/Lib/dos-8x3/linecach.py b/Lib/dos-8x3/linecach.py index 7de373f..bca40b2 100755 --- a/Lib/dos-8x3/linecach.py +++ b/Lib/dos-8x3/linecach.py @@ -1,18 +1,20 @@ -# Cache lines from files. -# This is intended to read lines from modules imported -- hence if a filename -# is not found, it will look down the module search path for a file by -# that name. +"""Cache lines from files. + +This is intended to read lines from modules imported -- hence if a filename +is not found, it will look down the module search path for a file by +that name. +""" import sys import os from stat import * def getline(filename, lineno): - lines = getlines(filename) - if 1 <= lineno <= len(lines): - return lines[lineno-1] - else: - return '' + lines = getlines(filename) + if 1 <= lineno <= len(lines): + return lines[lineno-1] + else: + return '' # The cache @@ -20,71 +22,71 @@ def getline(filename, lineno): cache = {} # The cache -# Clear the cache entirely - def clearcache(): - global cache - cache = {} + """Clear the cache entirely.""" + global cache + cache = {} -# Get the lines for a file from the cache. -# Update the cache if it doesn't contain an entry for this file already. def getlines(filename): - if cache.has_key(filename): - return cache[filename][2] - else: - return updatecache(filename) + """Get the lines for a file from the cache. + Update the cache if it doesn't contain an entry for this file already.""" + if cache.has_key(filename): + return cache[filename][2] + else: + return updatecache(filename) -# Discard cache entries that are out of date. -# (This is not checked upon each call!) def checkcache(): - for filename in cache.keys(): - size, mtime, lines, fullname = cache[filename] - try: - stat = os.stat(fullname) - except os.error: - del cache[filename] - continue - if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]: - del cache[filename] + """Discard cache entries that are out of date. + (This is not checked upon each call!)""" + for filename in cache.keys(): + size, mtime, lines, fullname = cache[filename] + try: + stat = os.stat(fullname) + except os.error: + del cache[filename] + continue + if size <> stat[ST_SIZE] or mtime <> stat[ST_MTIME]: + del cache[filename] -# Update a cache entry and return its list of lines. -# If something's wrong, print a message, discard the cache entry, -# and return an empty list. def updatecache(filename): - if cache.has_key(filename): - del cache[filename] - if not filename or filename[0] + filename[-1] == '<>': - return [] - fullname = filename - try: - stat = os.stat(fullname) - except os.error, msg: - # Try looking through the module search path - basename = os.path.split(filename)[1] - for dirname in sys.path: - fullname = os.path.join(dirname, basename) - try: - stat = os.stat(fullname) - break - except os.error: - pass - else: - # No luck -## print '*** Cannot stat', filename, ':', msg - return [] - try: - fp = open(fullname, 'r') - lines = fp.readlines() - fp.close() - except IOError, msg: -## print '*** Cannot open', fullname, ':', msg - return [] - size, mtime = stat[ST_SIZE], stat[ST_MTIME] - cache[filename] = size, mtime, lines, fullname - return lines + """Update a cache entry and return its list of lines. + If something's wrong, print a message, discard the cache entry, + and return an empty list.""" + + if cache.has_key(filename): + del cache[filename] + if not filename or filename[0] + filename[-1] == '<>': + return [] + fullname = filename + try: + stat = os.stat(fullname) + except os.error, msg: + # Try looking through the module search path + basename = os.path.split(filename)[1] + for dirname in sys.path: + fullname = os.path.join(dirname, basename) + try: + stat = os.stat(fullname) + break + except os.error: + pass + else: + # No luck +## print '*** Cannot stat', filename, ':', msg + return [] + try: + fp = open(fullname, 'r') + lines = fp.readlines() + fp.close() + except IOError, msg: +## print '*** Cannot open', fullname, ':', msg + return [] + size, mtime = stat[ST_SIZE], stat[ST_MTIME] + cache[filename] = size, mtime, lines, fullname + return lines diff --git a/Lib/dos-8x3/macurl2p.py b/Lib/dos-8x3/macurl2p.py index 7d273bc..c971eda 100755 --- a/Lib/dos-8x3/macurl2p.py +++ b/Lib/dos-8x3/macurl2p.py @@ -1,5 +1,6 @@ -"""Mac specific module for conversion between pathnames and URLs. -Do not import directly, use urllib instead.""" +"""Macintosh-specific module for conversion between pathnames and URLs. + +Do not import directly; use urllib instead.""" import string import urllib @@ -13,6 +14,11 @@ def url2pathname(pathname): tp = urllib.splittype(pathname)[0] if tp and tp <> 'file': raise RuntimeError, 'Cannot convert non-local URL to pathname' + # Turn starting /// into /, an empty hostname means current host + if pathname[:3] == '///': + pathname = pathname[2:] + elif pathname[:2] == '//': + raise RuntimeError, 'Cannot convert non-local URL to pathname' components = string.split(pathname, '/') # Remove . and embedded .. i = 0 diff --git a/Lib/dos-8x3/mimepars.py b/Lib/dos-8x3/mimepars.py new file mode 100644 index 0000000..46fe9eb --- /dev/null +++ b/Lib/dos-8x3/mimepars.py @@ -0,0 +1,246 @@ +"""Generic MIME parser. + +Classes: + + MimeParser - Generic MIME parser. + +Exceptions: + + MimeError - Exception raised by MimeParser class. + +XXX To do: + +- Content-transfer-encoding issues +- Use Content-length header in rawbody()? +- Cache parts instead of reparsing each time +- The message strings in exceptions could use some work + +""" + +from types import * # Python types, not MIME types :-) +import string +import regex +import SubFile +import mimetools + + +MimeError = "MimeParser.MimeError" # Exception raised by this class + + +class MimeParser: + + """Generic MIME parser. + + This requires a seekable file. + + """ + + def __init__(self, fp): + """Constructor: store the file pointer and parse the headers.""" + self._fp = fp + self._start = fp.tell() + self._headers = h = mimetools.Message(fp) + self._bodystart = fp.tell() + self._multipart = h.getmaintype() == 'multipart' + + def multipart(self): + """Return whether this is a multipart message.""" + return self._multipart + + def headers(self): + """Return the headers of the MIME message, as a Message object.""" + return self._headers + + def rawbody(self): + """Return the raw body of the MIME message, as a file-like object. + + This is a fairly low-level interface -- for a multipart + message, you'd have to parse the body yourself, and it doesn't + translate the Content-transfer-encoding. + + """ + # XXX Use Content-length to set end if it exists? + return SubFile.SubFile(self._fp, self._bodystart) + + def body(self): + """Return the body of a 1-part MIME message, as a file-like object. + + This should interpret the Content-transfer-encoding, if any + (XXX currently it doesn't). + + """ + if self._multipart: + raise MimeError, "body() only works for 1-part messages" + return self.rawbody() + + _re_content_length = regex.compile('content-length:[ \t]*\([0-9]+\)', + regex.casefold) + + def rawparts(self): + """Return the raw body parts of a multipart MIME message. + + This returns a list of SubFile() objects corresponding to the + parts. Note that the phantom part before the first separator + is returned too, as list item 0. If the final part is not + followed by a terminator, it is ignored, and this error is not + reported. (XXX: the error should be raised). + + """ + if not self._multipart: + raise MimeError, "[raw]parts() only works for multipart messages" + h = self._headers + separator = h.getparam('boundary') + if not separator: + raise MimeError, "multipart boundary not specified" + separator = "--" + separator + terminator = separator + "--" + ns = len(separator) + list = [] + f = self._fp + start = f.tell() + clength = -1 + bodystart = -1 + inheaders = 0 + while 1: + end = f.tell() + line = f.readline() + if not line: + break + if line[:2] != "--" or line[:ns] != separator: + if inheaders: + re = self._re_content_length + if re.match(line) > 0: + try: + clength = string.atoi(re.group(1)) + except string.atoi_error: + pass + if not string.strip(line): + inheaders = 0 + bodystart = f.tell() + if clength > 0: + # Skip binary data + f.read(clength) + continue + line = string.strip(line) + if line == terminator or line == separator: + if clength >= 0: + # The Content-length header determines the subfile size + end = bodystart + clength + else: + # The final newline is not part of the content + end = end-1 + list.append(SubFile.SubFile(f, start, end)) + start = f.tell() + clength = -1 + inheaders = 1 + if line == terminator: + break + return list + + def parts(self): + """Return the parsed body parts of a multipart MIME message. + + This returns a list of MimeParser() instances corresponding to + the parts. The phantom part before the first separator is not + included. + + """ + return map(MimeParser, self.rawparts()[1:]) + + def getsubpartbyposition(self, indices): + part = self + for i in indices: + part = part.parts()[i] + return part + + def getsubpartbyid(self, id): + h = self._headers + cid = h.getheader('content-id') + if cid and cid == id: + return self + if self._multipart: + for part in self.parts(): + parser = MimeParser(part) + hit = parser.getsubpartbyid(id) + if hit: + return hit + return None + + def index(self): + """Return an index of the MIME file. + + This parses the entire file and returns index information + about it, in the form of a tuple + + (ctype, headers, body) + + where 'ctype' is the content type string of the message + (e.g. `text/plain' or `multipart/mixed') and 'headers' is a + Message instance containing the message headers (which should + be treated as read-only). + + The 'body' item depends on the content type: + + - If it is an atomic message (anything except for content type + multipart/*), it is the file-like object returned by + self.body(). + + - For a content type of multipart/*, it is the list of + MimeParser() objects returned by self.parts(). + + """ + if self._multipart: + body = self.parts() + else: + body = self.body() + return self._headers.gettype(), self._headers, body + + +def _show(parser, level=0): + """Helper for _test().""" + ctype, headers, body = parser.index() + print ctype, + if type(body) == ListType: + nparts = len(body) + print "(%d part%s):" % (nparts, nparts != 1 and "s" or "") + n = 0 + for part in body: + n = n+1 + print "%*d." % (4*level+2, n), + _show(part, level+1) + else: + bodylines = body.readlines() + print "(%d header lines, %d body lines)" % ( + len(headers.headers), len(bodylines)) + for line in headers.headers + ['\n'] + bodylines: + if line[-1:] == '\n': line = line[:-1] + print " "*level + line + +def _test(args = None): + """Test program invoked when run as a script. + + When a filename argument is specified, it reads from that file. + When no arguments are present, it defaults to 'testkp.txt' if it + exists, else it defaults to stdin. + + """ + if not args: + import sys + args = sys.argv[1:] + if args: + fn = args[0] + else: + import os + fn = 'testkp.txt' + if not os.path.exists(fn): + fn = '-' + if fn == '-': + fp = sys.stdin + else: + fp = open(fn) + mp = MimeParser(fp) + _show(mp) + +if __name__ == '__main__': + import sys + _test() diff --git a/Lib/dos-8x3/mimetool.py b/Lib/dos-8x3/mimetool.py index fc72c79..27996e0 100755 --- a/Lib/dos-8x3/mimetool.py +++ b/Lib/dos-8x3/mimetool.py @@ -1,4 +1,4 @@ -# Various tools used by MIME-reading or MIME-writing programs. +"""Various tools used by MIME-reading or MIME-writing programs.""" import os @@ -7,10 +7,9 @@ import string import tempfile -# A derived class of rfc822.Message that knows about MIME headers and -# contains some hooks for decoding encoded and multipart messages. - class Message(rfc822.Message): + """A derived class of rfc822.Message that knows about MIME headers and + contains some hooks for decoding encoded and multipart messages.""" def __init__(self, fp, seekable = 1): rfc822.Message.__init__(self, fp, seekable) @@ -96,17 +95,17 @@ class Message(rfc822.Message): # ----------------- -# Return a random string usable as a multipart boundary. -# The method used is so that it is *very* unlikely that the same -# string of characters will every occur again in the Universe, -# so the caller needn't check the data it is packing for the -# occurrence of the boundary. -# -# The boundary contains dots so you have to quote it in the header. - _prefix = None def choose_boundary(): + """Return a random string usable as a multipart boundary. + The method used is so that it is *very* unlikely that the same + string of characters will every occur again in the Universe, + so the caller needn't check the data it is packing for the + occurrence of the boundary. + + The boundary contains dots so you have to quote it in the header.""" + global _prefix import time import random @@ -131,6 +130,7 @@ def choose_boundary(): # Subroutines for decoding some common content-transfer-types def decode(input, output, encoding): + """Decode common content-transfer-encodings (base64, quopri, uuencode).""" if encoding == 'base64': import base64 return base64.decode(input, output) @@ -140,6 +140,8 @@ def decode(input, output, encoding): if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'): import uu return uu.decode(input, output) + if encoding in ('7bit', '8bit'): + output.write(input.read()) if decodetab.has_key(encoding): pipethrough(input, decodetab[encoding], output) else: @@ -147,6 +149,7 @@ def decode(input, output, encoding): 'unknown Content-Transfer-Encoding: %s' % encoding def encode(input, output, encoding): + """Encode common content-transfer-encodings (base64, quopri, uuencode).""" if encoding == 'base64': import base64 return base64.encode(input, output) @@ -156,6 +159,8 @@ def encode(input, output, encoding): if encoding in ('uuencode', 'x-uuencode', 'uue', 'x-uue'): import uu return uu.encode(input, output) + if encoding in ('7bit', '8bit'): + output.write(input.read()) if encodetab.has_key(encoding): pipethrough(input, encodetab[encoding], output) else: diff --git a/Lib/dos-8x3/mimetype.py b/Lib/dos-8x3/mimetype.py index f15160f..9dc3645 100644 --- a/Lib/dos-8x3/mimetype.py +++ b/Lib/dos-8x3/mimetype.py @@ -30,8 +30,8 @@ import urllib knownfiles = [ "/usr/local/etc/httpd/conf/mime.types", "/usr/local/lib/netscape/mime.types", - "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 - "/usr/local/etc/mime.types", # Apache 1.3 + "/usr/local/etc/httpd/conf/mime.types", # Apache 1.2 + "/usr/local/etc/mime.types", # Apache 1.3 ] inited = 0 @@ -56,24 +56,24 @@ def guess_type(url): init() scheme, url = urllib.splittype(url) if scheme == 'data': - # syntax of data URLs: - # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data - # mediatype := [ type "/" subtype ] *( ";" parameter ) - # data := *urlchar - # parameter := attribute "=" value - # type/subtype defaults to "text/plain" - comma = string.find(url, ',') - if comma < 0: - # bad data URL - return None, None - semi = string.find(url, ';', 0, comma) - if semi >= 0: - type = url[:semi] - else: - type = url[:comma] - if '=' in type or '/' not in type: - type = 'text/plain' - return type, None # never compressed, so encoding is None + # syntax of data URLs: + # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data + # mediatype := [ type "/" subtype ] *( ";" parameter ) + # data := *urlchar + # parameter := attribute "=" value + # type/subtype defaults to "text/plain" + comma = string.find(url, ',') + if comma < 0: + # bad data URL + return None, None + semi = string.find(url, ';', 0, comma) + if semi >= 0: + type = url[:semi] + else: + type = url[:comma] + if '=' in type or '/' not in type: + type = 'text/plain' + return type, None # never compressed, so encoding is None base, ext = posixpath.splitext(url) while suffix_map.has_key(ext): base, ext = posixpath.splitext(base + suffix_map[ext]) @@ -175,6 +175,7 @@ types_map = { '.jpe': 'image/jpeg', '.jpeg': 'image/jpeg', '.jpg': 'image/jpeg', + '.js': 'application/x-javascript', '.latex': 'application/x-latex', '.man': 'application/x-troff-man', '.me': 'application/x-troff-me', diff --git a/Lib/dos-8x3/multifil.py b/Lib/dos-8x3/multifil.py index ce84087..977b92f 100755 --- a/Lib/dos-8x3/multifil.py +++ b/Lib/dos-8x3/multifil.py @@ -1,28 +1,31 @@ -# A class that makes each part of a multipart message "feel" like an -# ordinary file, as long as you use fp.readline(). Allows recursive -# use, for nested multipart messages. Probably best used together -# with module mimetools. -# -# Suggested use: -# -# real_fp = open(...) -# fp = MultiFile(real_fp) -# -# "read some lines from fp" -# fp.push(separator) -# while 1: -# "read lines from fp until it returns an empty string" (A) -# if not fp.next(): break -# fp.pop() -# "read remaining lines from fp until it returns an empty string" -# -# The latter sequence may be used recursively at (A). -# It is also allowed to use multiple push()...pop() sequences. -# -# If seekable is given as 0, the class code will not do the bookeeping -# it normally attempts in order to make seeks relative to the beginning of the -# current file part. This may be useful when using MultiFile with a non- -# seekable stream object. +"""A readline()-style interface to the parts of a multipart message. + +The MultiFile class makes each part of a multipart message "feel" like +an ordinary file, as long as you use fp.readline(). Allows recursive +use, for nested multipart messages. Probably best used together +with module mimetools. + +Suggested use: + +real_fp = open(...) +fp = MultiFile(real_fp) + +"read some lines from fp" +fp.push(separator) +while 1: + "read lines from fp until it returns an empty string" (A) + if not fp.next(): break +fp.pop() +"read remaining lines from fp until it returns an empty string" + +The latter sequence may be used recursively at (A). +It is also allowed to use multiple push()...pop() sequences. + +If seekable is given as 0, the class code will not do the bookeeping +it normally attempts in order to make seeks relative to the beginning of the +current file part. This may be useful when using MultiFile with a non- +seekable stream object. +""" import sys import string @@ -30,9 +33,9 @@ import string Error = 'multifile.Error' class MultiFile: - # + seekable = 0 - # + def __init__(self, fp, seekable=1): self.fp = fp self.stack = [] # Grows down @@ -42,12 +45,12 @@ class MultiFile: self.seekable = 1 self.start = self.fp.tell() self.posstack = [] # Grows down - # + def tell(self): if self.level > 0: return self.lastpos return self.fp.tell() - self.start - # + def seek(self, pos, whence=0): here = self.tell() if whence: @@ -64,7 +67,7 @@ class MultiFile: self.fp.seek(pos + self.start) self.level = 0 self.last = 0 - # + def readline(self): if self.level > 0: return '' @@ -105,7 +108,7 @@ class MultiFile: if self.level > 1: raise Error,'Missing endmarker in MultiFile.readline()' return '' - # + def readlines(self): list = [] while 1: @@ -113,10 +116,10 @@ class MultiFile: if not line: break list.append(line) return list - # + def read(self): # Note: no size argument -- read until EOF only! return string.joinfields(self.readlines(), '') - # + def next(self): while self.readline(): pass if self.level > 1 or self.last: @@ -126,7 +129,7 @@ class MultiFile: if self.seekable: self.start = self.fp.tell() return 1 - # + def push(self, sep): if self.level > 0: raise Error, 'bad MultiFile.push() call' @@ -134,7 +137,7 @@ class MultiFile: if self.seekable: self.posstack.insert(0, self.start) self.start = self.fp.tell() - # + def pop(self): if self.stack == []: raise Error, 'bad MultiFile.pop() call' @@ -149,12 +152,12 @@ class MultiFile: del self.posstack[0] if self.level > 0: self.lastpos = abslastpos - self.start - # + def is_data(self, line): return line[:2] <> '--' - # + def section_divider(self, str): return "--" + str - # + def end_marker(self, str): return "--" + str + "--" diff --git a/Lib/dos-8x3/nturl2pa.py b/Lib/dos-8x3/nturl2pa.py index 8c02049..ae261ba 100755 --- a/Lib/dos-8x3/nturl2pa.py +++ b/Lib/dos-8x3/nturl2pa.py @@ -1,6 +1,4 @@ -# -# nturl2path convert a NT pathname to a file URL and -# vice versa +"""Convert a NT pathname to a file URL and vice versa.""" def url2pathname(url): """ Convert a URL to a DOS path... @@ -34,7 +32,6 @@ def url2pathname(url): return path def pathname2url(p): - """ Convert a DOS path name to a file url... C:\foo\bar\spam.foo diff --git a/Lib/dos-8x3/posixfil.py b/Lib/dos-8x3/posixfil.py index 2cd4168..2db37e0 100755 --- a/Lib/dos-8x3/posixfil.py +++ b/Lib/dos-8x3/posixfil.py @@ -1,64 +1,61 @@ -# -# Start of posixfile.py -# - -# -# Extended file operations -# -# f = posixfile.open(filename, [mode, [bufsize]]) -# will create a new posixfile object -# -# f = posixfile.fileopen(fileobject) -# will create a posixfile object from a builtin file object -# -# f.file() -# will return the original builtin file object -# -# f.dup() -# will return a new file object based on a new filedescriptor -# -# f.dup2(fd) -# will return a new file object based on the given filedescriptor -# -# f.flags(mode) -# will turn on the associated flag (merge) -# mode can contain the following characters: -# -# (character representing a flag) -# a append only flag -# c close on exec flag -# n no delay flag -# s synchronization flag -# (modifiers) -# ! turn flags 'off' instead of default 'on' -# = copy flags 'as is' instead of default 'merge' -# ? return a string in which the characters represent the flags -# that are set -# -# note: - the '!' and '=' modifiers are mutually exclusive. -# - the '?' modifier will return the status of the flags after they -# have been changed by other characters in the mode string -# -# f.lock(mode [, len [, start [, whence]]]) -# will (un)lock a region -# mode can contain the following characters: -# -# (character representing type of lock) -# u unlock -# r read lock -# w write lock -# (modifiers) -# | wait until the lock can be granted -# ? return the first lock conflicting with the requested lock -# or 'None' if there is no conflict. The lock returned is in the -# format (mode, len, start, whence, pid) where mode is a -# character representing the type of lock ('r' or 'w') -# -# note: - the '?' modifier prevents a region from being locked; it is -# query only -# +"""Extended file operations available in POSIX. + +f = posixfile.open(filename, [mode, [bufsize]]) + will create a new posixfile object + +f = posixfile.fileopen(fileobject) + will create a posixfile object from a builtin file object + +f.file() + will return the original builtin file object + +f.dup() + will return a new file object based on a new filedescriptor + +f.dup2(fd) + will return a new file object based on the given filedescriptor + +f.flags(mode) + will turn on the associated flag (merge) + mode can contain the following characters: + + (character representing a flag) + a append only flag + c close on exec flag + n no delay flag + s synchronization flag + (modifiers) + ! turn flags 'off' instead of default 'on' + = copy flags 'as is' instead of default 'merge' + ? return a string in which the characters represent the flags + that are set + + note: - the '!' and '=' modifiers are mutually exclusive. + - the '?' modifier will return the status of the flags after they + have been changed by other characters in the mode string + +f.lock(mode [, len [, start [, whence]]]) + will (un)lock a region + mode can contain the following characters: + + (character representing type of lock) + u unlock + r read lock + w write lock + (modifiers) + | wait until the lock can be granted + ? return the first lock conflicting with the requested lock + or 'None' if there is no conflict. The lock returned is in the + format (mode, len, start, whence, pid) where mode is a + character representing the type of lock ('r' or 'w') + + note: - the '?' modifier prevents a region from being locked; it is + query only +""" class _posixfile_: + """File wrapper class that provides extra POSIX file routines.""" + states = ['open', 'closed'] # @@ -178,6 +175,7 @@ class _posixfile_: # additions for AIX by Vladimir.Marangozov@imag.fr import sys, os if sys.platform in ('netbsd1', + 'openbsd2', 'freebsd2', 'freebsd3', 'bsdos2', 'bsdos3', 'bsdos4'): flock = struct.pack('lxxxxlxxxxlhh', \ @@ -193,6 +191,7 @@ class _posixfile_: if '?' in how: if sys.platform in ('netbsd1', + 'openbsd2', 'freebsd2', 'freebsd3', 'bsdos2', 'bsdos3', 'bsdos4'): l_start, l_len, l_pid, l_type, l_whence = \ @@ -213,13 +212,12 @@ class _posixfile_: else: return 'w', l_len, l_start, l_whence, l_pid -# -# Public routine to obtain a posixfile object -# def open(name, mode='r', bufsize=-1): + """Public routine to open a file as a posixfile object.""" return _posixfile_().open(name, mode, bufsize) def fileopen(file): + """Public routine to get a posixfile object from a Python file object.""" return _posixfile_().fileopen(file) # diff --git a/Lib/dos-8x3/posixpat.py b/Lib/dos-8x3/posixpat.py index 36edc80..a603e9e 100755 --- a/Lib/dos-8x3/posixpat.py +++ b/Lib/dos-8x3/posixpat.py @@ -1,13 +1,13 @@ -# Module 'posixpath' -- common operations on Posix pathnames. -# Some of this can actually be useful on non-Posix systems too, e.g. -# for manipulation of the pathname component of URLs. -# The "os.path" name is an alias for this module on Posix systems; -# on other systems (e.g. Mac, Windows), os.path provides the same -# operations in a manner specific to that platform, and is an alias -# to another module (e.g. macpath, ntpath). -"""Common pathname manipulations, Posix version. -Instead of importing this module -directly, import os and refer to this module as os.path. +"""Common operations on Posix pathnames. + +Instead of importing this module directly, import os and refer to +this module as os.path. The "os.path" name is an alias for this +module on Posix systems; on other systems (e.g. Mac, Windows), +os.path provides the same operations in a manner specific to that +platform, and is an alias to another module (e.g. macpath, ntpath). + +Some of this can actually be useful on non-Posix systems too, e.g. +for manipulation of the pathname component of URLs. """ import os @@ -143,7 +143,7 @@ def getmtime(filename): def getatime(filename): """Return the last access time of a file, reported by os.stat().""" st = os.stat(filename) - return st[stat.ST_MTIME] + return st[stat.ST_ATIME] # Is a path a symbolic link? @@ -254,7 +254,7 @@ def ismount(path): # or to impose a different order of visiting. def walk(top, func, arg): - """walk(top,func,args) calls func(arg, d, files) for each directory "d" + """walk(top,func,arg) calls func(arg, d, files) for each directory "d" in the tree rooted at "top" (including "top" itself). "files" is a list of all the files and subdirs in directory "d". """ @@ -263,11 +263,10 @@ of all the files and subdirs in directory "d". except os.error: return func(arg, top, names) - exceptions = ('.', '..') for name in names: - if name not in exceptions: name = join(top, name) - if isdir(name) and not islink(name): + st = os.lstat(name) + if stat.S_ISDIR(st[stat.ST_MODE]): walk(name, func, arg) @@ -369,8 +368,8 @@ def normpath(path): return slashes + string.joinfields(comps, '/') -# Return an absolute path. def abspath(path): + """Return an absolute path.""" if not isabs(path): path = join(os.getcwd(), path) return normpath(path) diff --git a/Lib/dos-8x3/py_compi.py b/Lib/dos-8x3/py_compi.py index e1d0d70..c54d61b 100755 --- a/Lib/dos-8x3/py_compi.py +++ b/Lib/dos-8x3/py_compi.py @@ -7,7 +7,7 @@ import imp MAGIC = imp.get_magic() def wr_long(f, x): - "Internal; write a 32-bit int to a file in little-endian order." + """Internal; write a 32-bit int to a file in little-endian order.""" f.write(chr( x & 0xff)) f.write(chr((x >> 8) & 0xff)) f.write(chr((x >> 16) & 0xff)) diff --git a/Lib/dos-8x3/queue.py b/Lib/dos-8x3/queue.py index 79c4880..cb04006 100755 --- a/Lib/dos-8x3/queue.py +++ b/Lib/dos-8x3/queue.py @@ -1,4 +1,4 @@ -# A multi-producer, multi-consumer queue. +"""A multi-producer, multi-consumer queue.""" # define this exception to be compatible with Python 1.5's class # exceptions, but also when -X option is used. @@ -15,7 +15,7 @@ except TypeError: Full = 'Queue.Full' class Queue: - def __init__(self, maxsize): + def __init__(self, maxsize=0): """Initialize a queue object with a given maximum size. If maxsize is <= 0, the queue size is infinite. diff --git a/Lib/dos-8x3/regex_sy.py b/Lib/dos-8x3/regex_sy.py index 8631f42..aab7e7a 100755 --- a/Lib/dos-8x3/regex_sy.py +++ b/Lib/dos-8x3/regex_sy.py @@ -1,5 +1,11 @@ -# These bits are passed to regex.set_syntax() to choose among -# alternative regexp syntaxes. +"""Constants for selecting regexp syntaxes for the obsolete regex module. + +This module is only for backward compatibility. "regex" has now +been replaced by the new regular expression module, "re". + +These bits are passed to regex.set_syntax() to choose among +alternative regexp syntaxes. +""" # 1 means plain parentheses serve as grouping, and backslash # parentheses are needed for literal searching. diff --git a/Lib/dos-8x3/rfc822-n.py b/Lib/dos-8x3/rfc822-n.py new file mode 100644 index 0000000..ef7ab43 --- /dev/null +++ b/Lib/dos-8x3/rfc822-n.py @@ -0,0 +1,946 @@ +"""RFC-822 message manipulation class. + +XXX This is only a very rough sketch of a full RFC-822 parser; +in particular the tokenizing of addresses does not adhere to all the +quoting rules. + +Directions for use: + +To create a Message object: first open a file, e.g.: + fp = open(file, 'r') +You can use any other legal way of getting an open file object, e.g. use +sys.stdin or call os.popen(). +Then pass the open file object to the Message() constructor: + m = Message(fp) + +This class can work with any input object that supports a readline +method. If the input object has seek and tell capability, the +rewindbody method will work; also illegal lines will be pushed back +onto the input stream. If the input object lacks seek but has an +`unread' method that can push back a line of input, Message will use +that to push back illegal lines. Thus this class can be used to parse +messages coming from a buffered stream. + +The optional `seekable' argument is provided as a workaround for +certain stdio libraries in which tell() discards buffered data before +discovering that the lseek() system call doesn't work. For maximum +portability, you should set the seekable argument to zero to prevent +that initial \code{tell} when passing in an unseekable object such as +a a file object created from a socket object. If it is 1 on entry -- +which it is by default -- the tell() method of the open file object is +called once; if this raises an exception, seekable is reset to 0. For +other nonzero values of seekable, this test is not made. + +To get the text of a particular header there are several methods: + str = m.getheader(name) + str = m.getrawheader(name) +where name is the name of the header, e.g. 'Subject'. +The difference is that getheader() strips the leading and trailing +whitespace, while getrawheader() doesn't. Both functions retain +embedded whitespace (including newlines) exactly as they are +specified in the header, and leave the case of the text unchanged. + +For addresses and address lists there are functions + realname, mailaddress = m.getaddr(name) and + list = m.getaddrlist(name) +where the latter returns a list of (realname, mailaddr) tuples. + +There is also a method + time = m.getdate(name) +which parses a Date-like field and returns a time-compatible tuple, +i.e. a tuple such as returned by time.localtime() or accepted by +time.mktime(). + +See the class definition for lower level access methods. + +There are also some utility functions here. +""" +# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com> + +import string +import time + + +_blanklines = ('\r\n', '\n') # Optimization for islast() + + +class Message: + """Represents a single RFC-822-compliant message.""" + + def __init__(self, fp, seekable = 1): + """Initialize the class instance and read the headers.""" + if seekable == 1: + # Exercise tell() to make sure it works + # (and then assume seek() works, too) + try: + fp.tell() + except: + seekable = 0 + else: + seekable = 1 + self.fp = fp + self.seekable = seekable + self.startofheaders = None + self.startofbody = None + # + if self.seekable: + try: + self.startofheaders = self.fp.tell() + except IOError: + self.seekable = 0 + # + self.readheaders() + # + if self.seekable: + try: + self.startofbody = self.fp.tell() + except IOError: + self.seekable = 0 + + def rewindbody(self): + """Rewind the file to the start of the body (if seekable).""" + if not self.seekable: + raise IOError, "unseekable file" + self.fp.seek(self.startofbody) + + def readheaders(self): + """Read header lines. + + Read header lines up to the entirely blank line that + terminates them. The (normally blank) line that ends the + headers is skipped, but not included in the returned list. + If a non-header line ends the headers, (which is an error), + an attempt is made to backspace over it; it is never + included in the returned list. + + The variable self.status is set to the empty string if all + went well, otherwise it is an error message. + The variable self.headers is a completely uninterpreted list + of lines contained in the header (so printing them will + reproduce the header exactly as it appears in the file). + """ + self.dict = {} + self.__gamh_cache = {} + self.__gh_cache = {} + self.unixfrom = '' + self.headers = list = [] + self.status = '' + headerseen = "" + firstline = 1 + startofline = unread = tell = None + if hasattr(self.fp, 'unread'): + unread = self.fp.unread + elif self.seekable: + tell = self.fp.tell + while 1: + if tell: + startofline = tell() + line = self.fp.readline() + if not line: + self.status = 'EOF in headers' + break + # Skip unix From name time lines + if firstline and line[:5] == 'From ': + self.unixfrom = self.unixfrom + line + continue + firstline = 0 + if headerseen and line[0] in ' \t': + # It's a continuation line. + list.append(line) + self.__gamh_cache[headerseen].append(line) + x = string.lstrip( + "%s\n %s" % (self.dict[headerseen], string.strip(line))) + self.dict[headerseen] = x + self.__gh_cache[headerseen][-1] = x + continue + elif self.iscomment(line): + # It's a comment. Ignore it. + continue + elif self.islast(line): + # Note! No pushback here! The delimiter line gets eaten. + break + headerseen = self.isheader(line) + if headerseen: + # It's a legal header line, save it. + list.append(line) + l = self.__gamh_cache.get(headerseen) + if not l: + self.__gamh_cache[headerseen] = l = [] + l.append(line) + x = string.strip(line[len(headerseen)+1:]) + self.dict[headerseen] = x + l = self.__gh_cache.get(headerseen) + if not l: + self.__gh_cache[headerseen] = l = [] + l.append(x) + continue + else: + # It's not a header line; throw it back and stop here. + if not self.dict: + self.status = 'No headers' + else: + self.status = 'Non-header line where header expected' + # Try to undo the read. + if unread: + unread(line) + elif tell: + self.fp.seek(startofline) + else: + self.status = self.status + '; bad seek' + break + + def isheader(self, line): + """Determine whether a given line is a legal header. + + This method should return the header name, suitably canonicalized. + You may override this method in order to use Message parsing + on tagged data in RFC822-like formats with special header formats. + """ + i = string.find(line, ':') + if i > 0: + return string.lower(line[:i]) + else: + return None + + def islast(self, line): + """Determine whether a line is a legal end of RFC-822 headers. + + You may override this method if your application wants + to bend the rules, e.g. to strip trailing whitespace, + or to recognise MH template separators ('--------'). + For convenience (e.g. for code reading from sockets) a + line consisting of \r\n also matches. + """ + return line in _blanklines + + def iscomment(self, line): + """Determine whether a line should be skipped entirely. + + You may override this method in order to use Message parsing + on tagged data in RFC822-like formats that support embedded + comments or free-text data. + """ + return None + + def getallmatchingheaders(self, name, + # speed hack: + lower = string.lower): + """Find all header lines matching a given header name. + + Look through the list of headers and find all lines + matching a given header name (and their continuation + lines). A list of the lines is returned, without + interpretation. If the header does not occur, an + empty list is returned. If the header occurs multiple + times, all occurrences are returned. Case is not + important in the header name. + """ + r = self.__gamh_cache.get(lower(name)) + if r: + return r[:] + return [] + + def getfirstmatchingheader(self, name, + # speed hack: + lower = string.lower): + """Get the first header line matching name. + + This is similar to getallmatchingheaders, but it returns + only the first matching header (and its continuation + lines). + """ + l = self.__gamh_cache.get(lower(name)) + if not l: + return [] + r = [] + for item in l: + if r and item[0] not in " \t": + break + r.append(item) + return r + + def getrawheader(self, name): + """A higher-level interface to getfirstmatchingheader(). + + Return a string containing the literal text of the + header but with the keyword stripped. All leading, + trailing and embedded whitespace is kept in the + string, however. + Return None if the header does not occur. + """ + + list = self.getfirstmatchingheader(name) + if not list: + return None + list[0] = list[0][len(name) + 1:] + return string.joinfields(list, '') + + def getheader(self, name, default=None): + """Get the header value for a name. + + This is the normal interface: it returns a stripped + version of the header value for a given header name, + or None if it doesn't exist. This uses the dictionary + version which finds the *last* such header. + """ + try: + return self.dict[string.lower(name)] + except KeyError: + return default + get = getheader + + def getheaders(self, name, + # speed hack: + lower = string.lower): + """Get all values for a header. + + This returns a list of values for headers given more than once; + each value in the result list is stripped in the same way as the + result of getheader(). If the header is not given, return an + empty list. + """ + r = self.__gh_cache.get(lower(name)) + if r: + return r[:] + return [] + + def getaddr(self, name): + """Get a single address from a header, as a tuple. + + An example return value: + ('Guido van Rossum', 'guido@cwi.nl') + """ + # New, by Ben Escoto + alist = self.getaddrlist(name) + if alist: + return alist[0] + else: + return (None, None) + + def getaddrlist(self, name): + """Get a list of addresses from a header. + + Retrieves a list of addresses from a header, where each address is a + tuple as returned by getaddr(). Scans all named headers, so it works + properly with multiple To: or Cc: headers for example. + + """ + raw = [] + for h in self.getallmatchingheaders(name): + if h[0] in ' \t': + raw.append(h) + else: + if raw: + raw.append(', ') + i = string.find(h, ':') + if i > 0: + addr = h[i+1:] + raw.append(addr) + alladdrs = string.join(raw, '') + a = AddrlistClass(alladdrs) + return a.getaddrlist() + + def getdate(self, name): + """Retrieve a date field from a header. + + Retrieves a date field from the named header, returning + a tuple compatible with time.mktime(). + """ + try: + data = self[name] + except KeyError: + return None + return parsedate(data) + + def getdate_tz(self, name): + """Retrieve a date field from a header as a 10-tuple. + + The first 9 elements make up a tuple compatible with + time.mktime(), and the 10th is the offset of the poster's + time zone from GMT/UTC. + """ + try: + data = self[name] + except KeyError: + return None + return parsedate_tz(data) + + + # Access as a dictionary (only finds *last* header of each type): + + def __len__(self): + """Get the number of headers in a message.""" + return len(self.dict) + + def __getitem__(self, name): + """Get a specific header, as from a dictionary.""" + return self.dict[string.lower(name)] + + def __setitem__(self, name, value): + """Set the value of a header. + + Note: This is not a perfect inversion of __getitem__, because + any changed headers get stuck at the end of the raw-headers list + rather than where the altered header was. + """ + del self[name] # Won't fail if it doesn't exist + self.dict[string.lower(name)] = value + text = name + ": " + value + lines = string.split(text, "\n") + for line in lines: + self.headers.append(line + "\n") + + def __delitem__(self, name): + """Delete all occurrences of a specific header, if it is present.""" + name = string.lower(name) + if not self.dict.has_key(name): + return + del self.dict[name] + name = name + ':' + n = len(name) + list = [] + hit = 0 + for i in range(len(self.headers)): + line = self.headers[i] + if string.lower(line[:n]) == name: + hit = 1 + elif line[:1] not in string.whitespace: + hit = 0 + if hit: + list.append(i) + list.reverse() + for i in list: + del self.headers[i] + + def has_key(self, name): + """Determine whether a message contains the named header.""" + return self.dict.has_key(string.lower(name)) + + def keys(self): + """Get all of a message's header field names.""" + return self.dict.keys() + + def values(self): + """Get all of a message's header field values.""" + return self.dict.values() + + def items(self): + """Get all of a message's headers. + + Returns a list of name, value tuples. + """ + return self.dict.items() + + def __str__(self): + str = '' + for hdr in self.headers: + str = str + hdr + return str + + +# Utility functions +# ----------------- + +# XXX Should fix unquote() and quote() to be really conformant. +# XXX The inverses of the parse functions may also be useful. + + +def unquote(str): + """Remove quotes from a string.""" + if len(str) > 1: + if str[0] == '"' and str[-1:] == '"': + return str[1:-1] + if str[0] == '<' and str[-1:] == '>': + return str[1:-1] + return str + + +def quote(str): + """Add quotes around a string.""" + return '"%s"' % string.join( + string.split( + string.join( + string.split(str, '\\'), + '\\\\'), + '"'), + '\\"') + + +def parseaddr(address): + """Parse an address into a (realname, mailaddr) tuple.""" + a = AddrlistClass(address) + list = a.getaddrlist() + if not list: + return (None, None) + else: + return list[0] + + +class AddrlistClass: + """Address parser class by Ben Escoto. + + To understand what this class does, it helps to have a copy of + RFC-822 in front of you. + + Note: this class interface is deprecated and may be removed in the future. + Use rfc822.AddressList instead. + """ + + def __init__(self, field): + """Initialize a new instance. + + `field' is an unparsed address header field, containing + one or more addresses. + """ + self.specials = '()<>@,:;.\"[]' + self.pos = 0 + self.LWS = ' \t' + self.CR = '\r\n' + self.atomends = self.specials + self.LWS + self.CR + self.field = field + self.commentlist = [] + + def gotonext(self): + """Parse up to the start of the next address.""" + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS + '\n\r': + self.pos = self.pos + 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + else: break + + def getaddrlist(self): + """Parse all addresses. + + Returns a list containing all of the addresses. + """ + ad = self.getaddress() + if ad: + return ad + self.getaddrlist() + else: return [] + + def getaddress(self): + """Parse the next address.""" + self.commentlist = [] + self.gotonext() + + oldpos = self.pos + oldcl = self.commentlist + plist = self.getphraselist() + + self.gotonext() + returnlist = [] + + if self.pos >= len(self.field): + # Bad email address technically, no domain. + if plist: + returnlist = [(string.join(self.commentlist), plist[0])] + + elif self.field[self.pos] in '.@': + # email address is just an addrspec + # this isn't very efficient since we start over + self.pos = oldpos + self.commentlist = oldcl + addrspec = self.getaddrspec() + returnlist = [(string.join(self.commentlist), addrspec)] + + elif self.field[self.pos] == ':': + # address is a group + returnlist = [] + + fieldlen = len(self.field) + self.pos = self.pos + 1 + while self.pos < len(self.field): + self.gotonext() + if self.pos < fieldlen and self.field[self.pos] == ';': + self.pos = self.pos + 1 + break + returnlist = returnlist + self.getaddress() + + elif self.field[self.pos] == '<': + # Address is a phrase then a route addr + routeaddr = self.getrouteaddr() + + if self.commentlist: + returnlist = [(string.join(plist) + ' (' + \ + string.join(self.commentlist) + ')', routeaddr)] + else: returnlist = [(string.join(plist), routeaddr)] + + else: + if plist: + returnlist = [(string.join(self.commentlist), plist[0])] + elif self.field[self.pos] in self.specials: + self.pos = self.pos + 1 + + self.gotonext() + if self.pos < len(self.field) and self.field[self.pos] == ',': + self.pos = self.pos + 1 + return returnlist + + def getrouteaddr(self): + """Parse a route address (Return-path value). + + This method just skips all the route stuff and returns the addrspec. + """ + if self.field[self.pos] != '<': + return + + expectroute = 0 + self.pos = self.pos + 1 + self.gotonext() + adlist = None + while self.pos < len(self.field): + if expectroute: + self.getdomain() + expectroute = 0 + elif self.field[self.pos] == '>': + self.pos = self.pos + 1 + break + elif self.field[self.pos] == '@': + self.pos = self.pos + 1 + expectroute = 1 + elif self.field[self.pos] == ':': + self.pos = self.pos + 1 + expectaddrspec = 1 + else: + adlist = self.getaddrspec() + self.pos = self.pos + 1 + break + self.gotonext() + + return adlist + + def getaddrspec(self): + """Parse an RFC-822 addr-spec.""" + aslist = [] + + self.gotonext() + while self.pos < len(self.field): + if self.field[self.pos] == '.': + aslist.append('.') + self.pos = self.pos + 1 + elif self.field[self.pos] == '"': + aslist.append('"%s"' % self.getquote()) + elif self.field[self.pos] in self.atomends: + break + else: aslist.append(self.getatom()) + self.gotonext() + + if self.pos >= len(self.field) or self.field[self.pos] != '@': + return string.join(aslist, '') + + aslist.append('@') + self.pos = self.pos + 1 + self.gotonext() + return string.join(aslist, '') + self.getdomain() + + def getdomain(self): + """Get the complete domain name from an address.""" + sdlist = [] + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS: + self.pos = self.pos + 1 + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] == '[': + sdlist.append(self.getdomainliteral()) + elif self.field[self.pos] == '.': + self.pos = self.pos + 1 + sdlist.append('.') + elif self.field[self.pos] in self.atomends: + break + else: sdlist.append(self.getatom()) + return string.join(sdlist, '') + + def getdelimited(self, beginchar, endchars, allowcomments = 1): + """Parse a header fragment delimited by special characters. + + `beginchar' is the start character for the fragment. + If self is not looking at an instance of `beginchar' then + getdelimited returns the empty string. + + `endchars' is a sequence of allowable end-delimiting characters. + Parsing stops when one of these is encountered. + + If `allowcomments' is non-zero, embedded RFC-822 comments + are allowed within the parsed fragment. + """ + if self.field[self.pos] != beginchar: + return '' + + slist = [''] + quote = 0 + self.pos = self.pos + 1 + while self.pos < len(self.field): + if quote == 1: + slist.append(self.field[self.pos]) + quote = 0 + elif self.field[self.pos] in endchars: + self.pos = self.pos + 1 + break + elif allowcomments and self.field[self.pos] == '(': + slist.append(self.getcomment()) + elif self.field[self.pos] == '\\': + quote = 1 + else: + slist.append(self.field[self.pos]) + self.pos = self.pos + 1 + + return string.join(slist, '') + + def getquote(self): + """Get a quote-delimited fragment from self's field.""" + return self.getdelimited('"', '"\r', 0) + + def getcomment(self): + """Get a parenthesis-delimited fragment from self's field.""" + return self.getdelimited('(', ')\r', 1) + + def getdomainliteral(self): + """Parse an RFC-822 domain-literal.""" + return self.getdelimited('[', ']\r', 0) + + def getatom(self): + """Parse an RFC-822 atom.""" + atomlist = [''] + + while self.pos < len(self.field): + if self.field[self.pos] in self.atomends: + break + else: atomlist.append(self.field[self.pos]) + self.pos = self.pos + 1 + + return string.join(atomlist, '') + + def getphraselist(self): + """Parse a sequence of RFC-822 phrases. + + A phrase is a sequence of words, which are in turn either + RFC-822 atoms or quoted-strings. Phrases are canonicalized + by squeezing all runs of continuous whitespace into one space. + """ + plist = [] + + while self.pos < len(self.field): + if self.field[self.pos] in self.LWS: + self.pos = self.pos + 1 + elif self.field[self.pos] == '"': + plist.append(self.getquote()) + elif self.field[self.pos] == '(': + self.commentlist.append(self.getcomment()) + elif self.field[self.pos] in self.atomends: + break + else: plist.append(self.getatom()) + + return plist + +class AddressList(AddrlistClass): + """An AddressList encapsulates a list of parsed RFC822 addresses.""" + def __init__(self, field): + AddrlistClass.__init__(self, field) + if field: + self.addresslist = self.getaddrlist() + else: + self.addresslist = [] + + def __len__(self): + return len(self.addresslist) + + def __str__(self): + return string.joinfields(map(dump_address_pair, self.addresslist),", ") + + def __add__(self, other): + # Set union + newaddr = AddressList(None) + newaddr.addresslist = self.addresslist[:] + for x in other.addresslist: + if not x in self.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __sub__(self, other): + # Set difference + newaddr = AddressList(None) + for x in self.addresslist: + if not x in other.addresslist: + newaddr.addresslist.append(x) + return newaddr + + def __getitem__(self, index): + # Make indexing, slices, and 'in' work + return self.addresslist[index] + +def dump_address_pair(pair): + """Dump a (name, address) pair in a canonicalized form.""" + if pair[0]: + return '"' + pair[0] + '" <' + pair[1] + '>' + else: + return pair[1] + +# Parse a date field + +_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', + 'aug', 'sep', 'oct', 'nov', 'dec', + 'january', 'february', 'march', 'april', 'may', 'june', 'july', + 'august', 'september', 'october', 'november', 'december'] +_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] + +# The timezone table does not include the military time zones defined +# in RFC822, other than Z. According to RFC1123, the description in +# RFC822 gets the signs wrong, so we can't rely on any such time +# zones. RFC1123 recommends that numeric timezone indicators be used +# instead of timezone names. + +_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0, + 'AST': -400, 'ADT': -300, # Atlantic (used in Canada) + 'EST': -500, 'EDT': -400, # Eastern + 'CST': -600, 'CDT': -500, # Central + 'MST': -700, 'MDT': -600, # Mountain + 'PST': -800, 'PDT': -700 # Pacific + } + + +def parsedate_tz(data): + """Convert a date string to a time tuple. + + Accounts for military timezones. + """ + data = string.split(data) + if data[0][-1] in (',', '.') or string.lower(data[0]) in _daynames: + # There's a dayname here. Skip it + del data[0] + if len(data) == 3: # RFC 850 date, deprecated + stuff = string.split(data[0], '-') + if len(stuff) == 3: + data = stuff + data[1:] + if len(data) == 4: + s = data[3] + i = string.find(s, '+') + if i > 0: + data[3:] = [s[:i], s[i+1:]] + else: + data.append('') # Dummy tz + if len(data) < 5: + return None + data = data[:5] + [dd, mm, yy, tm, tz] = data + mm = string.lower(mm) + if not mm in _monthnames: + dd, mm = mm, string.lower(dd) + if not mm in _monthnames: + return None + mm = _monthnames.index(mm)+1 + if mm > 12: mm = mm - 12 + if dd[-1] == ',': + dd = dd[:-1] + i = string.find(yy, ':') + if i > 0: + yy, tm = tm, yy + if yy[-1] == ',': + yy = yy[:-1] + if yy[0] not in string.digits: + yy, tz = tz, yy + if tm[-1] == ',': + tm = tm[:-1] + tm = string.splitfields(tm, ':') + if len(tm) == 2: + [thh, tmm] = tm + tss = '0' + elif len(tm) == 3: + [thh, tmm, tss] = tm + else: + return None + try: + yy = string.atoi(yy) + dd = string.atoi(dd) + thh = string.atoi(thh) + tmm = string.atoi(tmm) + tss = string.atoi(tss) + except string.atoi_error: + return None + tzoffset=None + tz=string.upper(tz) + if _timezones.has_key(tz): + tzoffset=_timezones[tz] + else: + try: + tzoffset=string.atoi(tz) + except string.atoi_error: + pass + # Convert a timezone offset into seconds ; -0500 -> -18000 + if tzoffset: + if tzoffset < 0: + tzsign = -1 + tzoffset = -tzoffset + else: + tzsign = 1 + tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60) + tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset) + return tuple + + +def parsedate(data): + """Convert a time string to a time tuple.""" + t=parsedate_tz(data) + if type(t)==type( () ): + return t[:9] + else: return t + + +def mktime_tz(data): + """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp.""" + if data[9] is None: + # No zone info, so localtime is better assumption than GMT + return time.mktime(data[:8] + (-1,)) + else: + t = time.mktime(data[:8] + (0,)) + return t - data[9] - time.timezone + +def formatdate(timeval=None): + """Returns time format preferred for Internet standards. + + Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123 + """ + if timeval is None: + timeval = time.time() + return "%s" % time.strftime('%a, %d %b %Y %H:%M:%S GMT', + time.gmtime(timeval)) + + +# When used as script, run a small test program. +# The first command line argument must be a filename containing one +# message in RFC-822 format. + +if __name__ == '__main__': + import sys, os + file = os.path.join(os.environ['HOME'], 'Mail/inbox/1') + if sys.argv[1:]: file = sys.argv[1] + f = open(file, 'r') + m = Message(f) + print 'From:', m.getaddr('from') + print 'To:', m.getaddrlist('to') + print 'Subject:', m.getheader('subject') + print 'Date:', m.getheader('date') + date = m.getdate_tz('date') + if date: + print 'ParsedDate:', time.asctime(date[:-1]), + hhmmss = date[-1] + hhmm, ss = divmod(hhmmss, 60) + hh, mm = divmod(hhmm, 60) + print "%+03d%02d" % (hh, mm), + if ss: print ".%02d" % ss, + print + else: + print 'ParsedDate:', None + m.rewindbody() + n = 0 + while f.readline(): + n = n + 1 + print 'Lines:', n + print '-'*70 + print 'len =', len(m) + if m.has_key('Date'): print 'Date =', m['Date'] + if m.has_key('X-Nonsense'): pass + print 'keys =', m.keys() + print 'values =', m.values() + print 'items =', m.items() diff --git a/Lib/dos-8x3/rlcomple.py b/Lib/dos-8x3/rlcomple.py index 92633ab..aa1dd02 100644 --- a/Lib/dos-8x3/rlcomple.py +++ b/Lib/dos-8x3/rlcomple.py @@ -1,7 +1,6 @@ """Word completion for GNU readline 2.0. This requires the latest extension to the readline module (the -set_completer() function). When completing a simple identifier, it completes keywords, built-ins and globals in __main__; when completing NAME.NAME..., it evaluates (!) the expression up to the last dot and completes its attributes. @@ -87,7 +86,8 @@ class Completer: Assuming the text is of the form NAME.NAME....[NAME], and is evaluabable in the globals of __main__, it will be evaluated and its attributes (as revealed by dir()) are used as possible - completions. + completions. (For class instances, class members are are also + considered.) WARNING: this can still invoke arbitrary C code, if an object with a __getattr__ hook is evaluated. @@ -98,7 +98,11 @@ class Completer: if not m: return expr, attr = m.group(1, 3) - words = dir(eval(expr, __main__.__dict__)) + object = eval(expr, __main__.__dict__) + words = dir(object) + if hasattr(object,'__class__'): + words.append('__class__') + words = words + get_class_members(object.__class__) matches = [] n = len(attr) for word in words: @@ -106,4 +110,11 @@ class Completer: matches.append("%s.%s" % (expr, word)) return matches +def get_class_members(klass): + ret = dir(klass) + if hasattr(klass,'__bases__'): + for base in klass.__bases__: + ret = ret + get_class_members(base) + return ret + readline.set_completer(Completer().complete) diff --git a/Lib/dos-8x3/robotpar.py b/Lib/dos-8x3/robotpar.py new file mode 100644 index 0000000..3f4396b --- /dev/null +++ b/Lib/dos-8x3/robotpar.py @@ -0,0 +1,97 @@ +""" + +Robots.txt file parser class. Accepts a list of lines or robots.txt URL as +input, builds a set of rules from that list, then answers questions about +fetchability of other URLs. + +""" + +class RobotFileParser: + + def __init__(self): + self.rules = {} + self.debug = 0 + self.url = '' + self.last_checked = 0 + + def mtime(self): + return self.last_checked + + def modified(self): + import time + self.last_checked = time.time() + + def set_url(self, url): + self.url = url + + def read(self): + import urllib + self.parse(urllib.urlopen(self.url).readlines()) + + def parse(self, lines): + """parse the input lines from a robot.txt file""" + import string, re + active = [] + for line in lines: + if self.debug: print '>', line, + # blank line terminates current record + if not line[:-1]: + active = [] + continue + # remove optional comment and strip line + line = string.strip(line[:string.find(line, '#')]) + if not line: + continue + line = re.split(' *: *', line) + if len(line) == 2: + line[0] = string.lower(line[0]) + if line[0] == 'user-agent': + # this record applies to this user agent + if self.debug: print '>> user-agent:', line[1] + active.append(line[1]) + if not self.rules.has_key(line[1]): + self.rules[line[1]] = [] + elif line[0] == 'disallow': + if line[1]: + if self.debug: print '>> disallow:', line[1] + for agent in active: + self.rules[agent].append(re.compile(line[1])) + else: + pass + for agent in active: + if self.debug: print '>> allow', agent + self.rules[agent] = [] + else: + if self.debug: print '>> unknown:', line + + self.modified() + + # returns true if agent is allowed to fetch url + def can_fetch(self, useragent, url): + """using the parsed robots.txt decide if useragent can fetch url""" + import urlparse + ag = useragent + if not self.rules.has_key(ag): ag = '*' + if not self.rules.has_key(ag): + if self.debug: print '>> allowing', url, 'fetch by', useragent + return 1 + path = urlparse.urlparse(url)[2] + for rule in self.rules[ag]: + if rule.match(path) is not None: + if self.debug: print '>> disallowing', url, 'fetch by', useragent + return 0 + if self.debug: print '>> allowing', url, 'fetch by', useragent + return 1 + +def _test(): + rp = RobotFileParser() + rp.debug = 1 + rp.set_url('http://www.musi-cal.com/robots.txt') + rp.read() + print rp.rules + print rp.can_fetch('*', 'http://www.musi-cal.com.com/') + print rp.can_fetch('Musi-Cal-Robot', + 'http://www.musi-cal.com/cgi-bin/event-search?city=San+Francisco') + +if __name__ == "__main__": + _test() diff --git a/Lib/dos-8x3/simpleht.py b/Lib/dos-8x3/simpleht.py index 9260e7e..a4517dc 100755 --- a/Lib/dos-8x3/simpleht.py +++ b/Lib/dos-8x3/simpleht.py @@ -10,13 +10,10 @@ __version__ = "0.3" import os -import sys -import time -import socket import string import posixpath -import SocketServer import BaseHTTPServer +import urllib class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): @@ -81,7 +78,7 @@ class SimpleHTTPRequestHandler(BaseHTTPServer.BaseHTTPRequestHandler): probably be diagnosed.) """ - path = posixpath.normpath(path) + path = posixpath.normpath(urllib.unquote(path)) words = string.splitfields(path, '/') words = filter(None, words) path = os.getcwd() diff --git a/Lib/dos-8x3/socketse.py b/Lib/dos-8x3/socketse.py index 23f3a8e..2c73356 100755 --- a/Lib/dos-8x3/socketse.py +++ b/Lib/dos-8x3/socketse.py @@ -207,7 +207,10 @@ class TCPServer: def handle_request(self): """Handle one request, possibly blocking.""" - request, client_address = self.get_request() + try: + request, client_address = self.get_request() + except socket.error: + return if self.verify_request(request, client_address): try: self.process_request(request, client_address) @@ -278,11 +281,21 @@ class ForkingMixIn: """Mix-in class to handle each request in a new process.""" active_children = None + max_children = 40 def collect_children(self): """Internal routine to wait for died children.""" while self.active_children: - pid, status = os.waitpid(0, os.WNOHANG) + if len(self.active_children) < self.max_children: + options = os.WNOHANG + else: + # If the maximum number of children are already + # running, block while waiting for a child to exit + options = 0 + try: + pid, status = os.waitpid(0, options) + except os.error: + pid = None if not pid: break self.active_children.remove(pid) @@ -300,6 +313,7 @@ class ForkingMixIn: # Child process. # This must never return, hence os._exit()! try: + self.socket.close() self.finish_request(request, client_address) os._exit(0) except: @@ -311,14 +325,14 @@ class ForkingMixIn: class ThreadingMixIn: - """Mix-in class to handle each request in a new thread.""" def process_request(self, request, client_address): """Start a new thread to process the request.""" - import thread - thread.start_new_thread(self.finish_request, - (request, client_address)) + import threading + t = threading.Thread(target = self.finish_request, + args = (request, client_address)) + t.start() class ForkingUDPServer(ForkingMixIn, UDPServer): pass diff --git a/Lib/dos-8x3/sre_comp.py b/Lib/dos-8x3/sre_comp.py new file mode 100644 index 0000000..8738061 --- /dev/null +++ b/Lib/dos-8x3/sre_comp.py @@ -0,0 +1,187 @@ +# +# Secret Labs' Regular Expression Engine +# $Id$ +# +# convert template to internal format +# +# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. +# +# This code can only be used for 1.6 alpha testing. All other use +# require explicit permission from Secret Labs AB. +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# + +# FIXME: <fl> formalize (objectify?) and document the compiler code +# format, so that other frontends can use this compiler + +import array, string, sys + +import _sre + +from sre_constants import * + +# find an array type code that matches the engine's code size +for WORDSIZE in "BHil": + if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize(): + break +else: + raise RuntimeError, "cannot find a useable array type" + +# FIXME: <fl> should move some optimizations from the parser to here! + +class Code: + def __init__(self): + self.data = [] + def __len__(self): + return len(self.data) + def __getitem__(self, index): + return self.data[index] + def __setitem__(self, index, code): + self.data[index] = code + def append(self, code): + self.data.append(code) + def todata(self): + # print self.data + return array.array(WORDSIZE, self.data).tostring() + +def _lower(literal): + # return _sre._lower(literal) # FIXME + return string.lower(literal) + +def _compile(code, pattern, flags): + append = code.append + for op, av in pattern: + if op is ANY: + if "s" in flags: + append(CODES[op]) # any character at all! + else: + append(CODES[NOT_LITERAL]) + append(10) + elif op in (SUCCESS, FAILURE): + append(CODES[op]) + elif op is AT: + append(CODES[op]) + append(POSITIONS[av]) + elif op is BRANCH: + append(CODES[op]) + tail = [] + for av in av[1]: + skip = len(code); append(0) + _compile(code, av, flags) + append(CODES[JUMP]) + tail.append(len(code)); append(0) + code[skip] = len(code) - skip + append(0) # end of branch + for tail in tail: + code[tail] = len(code) - tail + elif op is CALL: + append(CODES[op]) + skip = len(code); append(0) + _compile(code, av, flags) + append(CODES[SUCCESS]) + code[skip] = len(code) - skip + elif op is CATEGORY: # not used by current parser + append(CODES[op]) + append(CATEGORIES[av]) + elif op is GROUP: + if "i" in flags: + append(CODES[MAP_IGNORE[op]]) + else: + append(CODES[op]) + append(av) + elif op is IN: + if "i" in flags: + append(CODES[MAP_IGNORE[op]]) + def fixup(literal): + return ord(_lower(literal)) + else: + append(CODES[op]) + fixup = ord + skip = len(code); append(0) + for op, av in av: + append(CODES[op]) + if op is NEGATE: + pass + elif op is LITERAL: + append(fixup(av)) + elif op is RANGE: + append(fixup(av[0])) + append(fixup(av[1])) + elif op is CATEGORY: + append(CATEGORIES[av]) + else: + raise ValueError, "unsupported set operator" + append(CODES[FAILURE]) + code[skip] = len(code) - skip + elif op in (LITERAL, NOT_LITERAL): + if "i" in flags: + append(CODES[MAP_IGNORE[op]]) + append(ord(_lower(av))) + else: + append(CODES[op]) + append(ord(av)) + elif op is MARK: + append(CODES[op]) + append(av) + elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): + lo, hi = av[2].getwidth() + if lo == 0: + raise SyntaxError, "cannot repeat zero-width items" + if lo == hi == 1 and op is MAX_REPEAT: + append(CODES[MAX_REPEAT_ONE]) + skip = len(code); append(0) + append(av[0]) + append(av[1]) + _compile(code, av[2], flags) + append(CODES[SUCCESS]) + code[skip] = len(code) - skip + else: + append(CODES[op]) + skip = len(code); append(0) + append(av[0]) + append(av[1]) + _compile(code, av[2], flags) + if op is MIN_REPEAT: + append(CODES[MIN_UNTIL]) + else: + # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?) + append(CODES[MAX_UNTIL]) + code[skip] = len(code) - skip + elif op is SUBPATTERN: +## group = av[0] +## if group: +## append(CODES[MARK]) +## append((group-1)*2) + _compile(code, av[1], flags) +## if group: +## append(CODES[MARK]) +## append((group-1)*2+1) + else: + raise ValueError, ("unsupported operand type", op) + +def compile(p, flags=()): + # convert pattern list to internal format + if type(p) in (type(""), type(u"")): + import sre_parse + pattern = p + p = sre_parse.parse(p) + else: + pattern = None + # print p.getwidth() + # print p + code = Code() + _compile(code, p.data, p.pattern.flags) + code.append(CODES[SUCCESS]) + # print list(code.data) + data = code.todata() + if 0: # debugging + print + print "-" * 68 + import sre_disasm + sre_disasm.disasm(data) + print "-" * 68 + # print len(data), p.pattern.groups, len(p.pattern.groupdict) + return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict) diff --git a/Lib/dos-8x3/sre_cons.py b/Lib/dos-8x3/sre_cons.py new file mode 100644 index 0000000..af88309 --- /dev/null +++ b/Lib/dos-8x3/sre_cons.py @@ -0,0 +1,131 @@ +# +# Secret Labs' Regular Expression Engine +# $Id$ +# +# various symbols used by the regular expression engine. +# run this script to update the _sre include files! +# +# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. +# +# This code can only be used for 1.6 alpha testing. All other use +# require explicit permission from Secret Labs AB. +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# + +# operators + +FAILURE = "failure" +SUCCESS = "success" + +ANY = "any" +ASSERT = "assert" +AT = "at" +BRANCH = "branch" +CALL = "call" +CATEGORY = "category" +GROUP = "group" +GROUP_IGNORE = "group_ignore" +IN = "in" +IN_IGNORE = "in_ignore" +JUMP = "jump" +LITERAL = "literal" +LITERAL_IGNORE = "literal_ignore" +MARK = "mark" +MAX_REPEAT = "max_repeat" +MAX_REPEAT_ONE = "max_repeat_one" +MAX_UNTIL = "max_until" +MIN_REPEAT = "min_repeat" +MIN_UNTIL = "min_until" +NEGATE = "negate" +NOT_LITERAL = "not_literal" +NOT_LITERAL_IGNORE = "not_literal_ignore" +RANGE = "range" +REPEAT = "repeat" +SUBPATTERN = "subpattern" + +# positions +AT_BEGINNING = "at_beginning" +AT_BOUNDARY = "at_boundary" +AT_NON_BOUNDARY = "at_non_boundary" +AT_END = "at_end" + +# categories + +CATEGORY_DIGIT = "category_digit" +CATEGORY_NOT_DIGIT = "category_not_digit" +CATEGORY_SPACE = "category_space" +CATEGORY_NOT_SPACE = "category_not_space" +CATEGORY_WORD = "category_word" +CATEGORY_NOT_WORD = "category_not_word" + +CODES = [ + + # failure=0 success=1 (just because it looks better that way :-) + FAILURE, SUCCESS, + + ANY, + ASSERT, + AT, + BRANCH, + CALL, + CATEGORY, + GROUP, GROUP_IGNORE, + IN, IN_IGNORE, + JUMP, + LITERAL, LITERAL_IGNORE, + MARK, + MAX_REPEAT, MAX_UNTIL, + MAX_REPEAT_ONE, + MIN_REPEAT, MIN_UNTIL, + NOT_LITERAL, NOT_LITERAL_IGNORE, + NEGATE, + RANGE, + REPEAT + +] + +# convert to dictionary +c = {} +i = 0 +for code in CODES: + c[code] = i + i = i + 1 +CODES = c + +# replacement operations for "ignore case" mode +MAP_IGNORE = { + GROUP: GROUP_IGNORE, + IN: IN_IGNORE, + LITERAL: LITERAL_IGNORE, + NOT_LITERAL: NOT_LITERAL_IGNORE +} + +POSITIONS = { + AT_BEGINNING: ord("a"), + AT_BOUNDARY: ord("b"), + AT_NON_BOUNDARY: ord("B"), + AT_END: ord("z"), +} + +CATEGORIES = { + CATEGORY_DIGIT: ord("d"), + CATEGORY_NOT_DIGIT: ord("D"), + CATEGORY_SPACE: ord("s"), + CATEGORY_NOT_SPACE: ord("S"), + CATEGORY_WORD: ord("w"), + CATEGORY_NOT_WORD: ord("W"), +} + +if __name__ == "__main__": + import string + items = CODES.items() + items.sort(lambda a, b: cmp(a[1], b[1])) + f = open("sre_constants.h", "w") + f.write("/* generated by sre_constants.py */\n") + for k, v in items: + f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n") + f.close() + print "done" diff --git a/Lib/dos-8x3/sre_pars.py b/Lib/dos-8x3/sre_pars.py new file mode 100644 index 0000000..8b68ea1 --- /dev/null +++ b/Lib/dos-8x3/sre_pars.py @@ -0,0 +1,497 @@ +# +# Secret Labs' Regular Expression Engine +# $Id$ +# +# convert re-style regular expression to SRE template. the current +# implementation is somewhat incomplete, and not very fast. should +# definitely be rewritten before Python 1.6 goes beta. +# +# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved. +# +# This code can only be used for 1.6 alpha testing. All other use +# require explicit permission from Secret Labs AB. +# +# Portions of this engine have been developed in cooperation with +# CNRI. Hewlett-Packard provided funding for 1.6 integration and +# other compatibility work. +# + +# FIXME: comments marked with the FIXME tag are open issues. all such +# issues should be closed before the final beta. + +import string, sys + +from sre_constants import * + +SPECIAL_CHARS = ".\\[{()*+?^$|" +REPEAT_CHARS = "*+?{" + +# FIXME: string in tuple tests may explode with if char is unicode :-( +DIGITS = tuple(string.digits) + +OCTDIGITS = tuple("01234567") +HEXDIGITS = tuple("0123456789abcdefABCDEF") + +ESCAPES = { + "\\a": (LITERAL, chr(7)), + "\\b": (LITERAL, chr(8)), + "\\f": (LITERAL, chr(12)), + "\\n": (LITERAL, chr(10)), + "\\r": (LITERAL, chr(13)), + "\\t": (LITERAL, chr(9)), + "\\v": (LITERAL, chr(11)) +} + +CATEGORIES = { + "\\A": (AT, AT_BEGINNING), # start of string + "\\b": (AT, AT_BOUNDARY), + "\\B": (AT, AT_NON_BOUNDARY), + "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), + "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), + "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), + "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), + "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]), + "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), + "\\Z": (AT, AT_END), # end of string +} + +class Pattern: + # FIXME: <fl> rename class, and store flags in here too! + def __init__(self): + self.flags = [] + self.groups = 1 + self.groupdict = {} + def getgroup(self, name=None): + gid = self.groups + self.groups = gid + 1 + if name: + self.groupdict[name] = gid + return gid + def setflag(self, flag): + if flag in self.flags: + self.flags.append(flag) + +class SubPattern: + # a subpattern, in intermediate form + def __init__(self, pattern, data=None): + self.pattern = pattern + if not data: + data = [] + self.data = data + self.flags = [] + self.width = None + def __repr__(self): + return repr(self.data) + def __len__(self): + return len(self.data) + def __delitem__(self, index): + del self.data[index] + def __getitem__(self, index): + return self.data[index] + def __setitem__(self, index, code): + self.data[index] = code + def __getslice__(self, start, stop): + return SubPattern(self.pattern, self.data[start:stop]) + def insert(self, index, code): + self.data.insert(index, code) + def append(self, code): + self.data.append(code) + def getwidth(self): + # determine the width (min, max) for this subpattern + if self.width: + return self.width + lo = hi = 0L + for op, av in self.data: + if op is BRANCH: + l = sys.maxint + h = 0 + for av in av[1]: + i, j = av.getwidth() + l = min(l, i) + h = min(h, j) + lo = lo + i + hi = hi + j + elif op is CALL: + i, j = av.getwidth() + lo = lo + i + hi = hi + j + elif op is SUBPATTERN: + i, j = av[1].getwidth() + lo = lo + i + hi = hi + j + elif op in (MIN_REPEAT, MAX_REPEAT): + i, j = av[2].getwidth() + lo = lo + i * av[0] + hi = hi + j * av[1] + elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY): + lo = lo + 1 + hi = hi + 1 + elif op == SUCCESS: + break + self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint)) + return self.width + def set(self, flag): + if not flag in self.flags: + self.flags.append(flag) + def reset(self, flag): + if flag in self.flags: + self.flags.remove(flag) + +class Tokenizer: + def __init__(self, string): + self.string = list(string) + self.next = self.__next() + def __next(self): + if not self.string: + return None + char = self.string[0] + if char[0] == "\\": + try: + c = self.string[1] + except IndexError: + raise SyntaxError, "bogus escape" + char = char + c + try: + if c == "x": + # hexadecimal constant + for i in xrange(2, sys.maxint): + c = self.string[i] + if str(c) not in HEXDIGITS: + break + char = char + c + elif str(c) in DIGITS: + # decimal (or octal) number + for i in xrange(2, sys.maxint): + c = self.string[i] + # FIXME: if larger than current number of + # groups, interpret as an octal number + if str(c) not in DIGITS: + break + char = char + c + except IndexError: + pass # use what we've got this far + del self.string[0:len(char)] + return char + def match(self, char): + if char == self.next: + self.next = self.__next() + return 1 + return 0 + def match_set(self, set): + if self.next and self.next in set: + self.next = self.__next() + return 1 + return 0 + def get(self): + this = self.next + self.next = self.__next() + return this + +def _fixescape(escape, character_class=0): + # convert escape to (type, value) + if character_class: + # inside a character class, we'll look in the character + # escapes dictionary first + code = ESCAPES.get(escape) + if code: + return code + code = CATEGORIES.get(escape) + else: + code = CATEGORIES.get(escape) + if code: + return code + code = ESCAPES.get(escape) + if code: + return code + if not character_class: + try: + group = int(escape[1:]) + # FIXME: only valid if group <= current number of groups + return GROUP, group + except ValueError: + pass + try: + if escape[1:2] == "x": + escape = escape[2:] + return LITERAL, chr(int(escape[-2:], 16) & 0xff) + elif str(escape[1:2]) in DIGITS: + return LITERAL, chr(int(escape[1:], 8) & 0xff) + elif len(escape) == 2: + return LITERAL, escape[1] + except ValueError: + pass + raise SyntaxError, "bogus escape: %s" % repr(escape) + +def _branch(subpattern, items): + + # form a branch operator from a set of items (FIXME: move this + # optimization to the compiler module!) + + # check if all items share a common prefix + while 1: + prefix = None + for item in items: + if not item: + break + if prefix is None: + prefix = item[0] + elif item[0] != prefix: + break + else: + # all subitems start with a common "prefix". + # move it out of the branch + for item in items: + del item[0] + subpattern.append(prefix) + continue # check next one + break + + # check if the branch can be replaced by a character set + for item in items: + if len(item) != 1 or item[0][0] != LITERAL: + break + else: + # we can store this as a character set instead of a + # branch (FIXME: use a range if possible) + set = [] + for item in items: + set.append(item[0]) + subpattern.append((IN, set)) + return + + subpattern.append((BRANCH, (None, items))) + +def _parse(source, pattern, flags=()): + + # parse regular expression pattern into an operator list. + + subpattern = SubPattern(pattern) + + this = None + + while 1: + + if str(source.next) in ("|", ")"): + break # end of subpattern + this = source.get() + if this is None: + break # end of pattern + + if this and this[0] not in SPECIAL_CHARS: + subpattern.append((LITERAL, this)) + + elif this == "[": + # character set + set = [] +## if source.match(":"): +## pass # handle character classes + if source.match("^"): + set.append((NEGATE, None)) + # check remaining characters + start = set[:] + while 1: + this = source.get() + if this == "]" and set != start: + break + elif this and this[0] == "\\": + code1 = _fixescape(this, 1) + elif this: + code1 = LITERAL, this + else: + raise SyntaxError, "unexpected end of regular expression" + if source.match("-"): + # potential range + this = source.get() + if this == "]": + set.append(code1) + set.append((LITERAL, "-")) + break + else: + if this[0] == "\\": + code2 = _fixescape(this, 1) + else: + code2 = LITERAL, this + if code1[0] != LITERAL or code2[0] != LITERAL: + raise SyntaxError, "illegal range" + if len(code1[1]) != 1 or len(code2[1]) != 1: + raise SyntaxError, "illegal range" + set.append((RANGE, (code1[1], code2[1]))) + else: + if code1[0] is IN: + code1 = code1[1][0] + set.append(code1) + + # FIXME: <fl> move set optimization to support function + if len(set)==1 and set[0][0] is LITERAL: + subpattern.append(set[0]) # optimization + elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: + subpattern.append((NOT_LITERAL, set[1][1])) # optimization + else: + # FIXME: <fl> add charmap optimization + subpattern.append((IN, set)) + + elif this and this[0] in REPEAT_CHARS: + # repeat previous item + if this == "?": + min, max = 0, 1 + elif this == "*": + min, max = 0, sys.maxint + elif this == "+": + min, max = 1, sys.maxint + elif this == "{": + min, max = 0, sys.maxint + lo = hi = "" + while str(source.next) in DIGITS: + lo = lo + source.get() + if source.match(","): + while str(source.next) in DIGITS: + hi = hi + source.get() + else: + hi = lo + if not source.match("}"): + raise SyntaxError, "bogus range" + if lo: + min = int(lo) + if hi: + max = int(hi) + # FIXME: <fl> check that hi >= lo! + else: + raise SyntaxError, "not supported" + # figure out which item to repeat + # FIXME: should back up to the right mark, right? + if subpattern: + index = len(subpattern)-1 + while subpattern[index][0] is MARK: + index = index - 1 + item = subpattern[index:index+1] + else: + raise SyntaxError, "nothing to repeat" + if source.match("?"): + subpattern[index] = (MIN_REPEAT, (min, max, item)) + else: + subpattern[index] = (MAX_REPEAT, (min, max, item)) + elif this == ".": + subpattern.append((ANY, None)) + elif this == "(": + group = 1 + name = None + if source.match("?"): + group = 0 + # options + if source.match("P"): + # named group: skip forward to end of name + if source.match("<"): + name = "" + while 1: + char = source.get() + if char is None or char == ">": + break + name = name + char + group = 1 + elif source.match(":"): + # non-capturing group + group = 2 + elif source.match_set("iI"): + pattern.setflag("i") + elif source.match_set("lL"): + pattern.setflag("l") + elif source.match_set("mM"): + pattern.setflag("m") + elif source.match_set("sS"): + pattern.setflag("s") + elif source.match_set("xX"): + pattern.setflag("x") + if group: + # parse group contents + b = [] + if group == 2: + # anonymous group + group = None + else: + group = pattern.getgroup(name) + if group: + subpattern.append((MARK, (group-1)*2)) + while 1: + p = _parse(source, pattern, flags) + if source.match(")"): + if b: + b.append(p) + _branch(subpattern, b) + else: + subpattern.append((SUBPATTERN, (group, p))) + break + elif source.match("|"): + b.append(p) + else: + raise SyntaxError, "group not properly closed" + if group: + subpattern.append((MARK, (group-1)*2+1)) + else: + # FIXME: should this really be a while loop? + while 1: + char = source.get() + if char is None or char == ")": + break + + elif this == "^": + subpattern.append((AT, AT_BEGINNING)) + + elif this == "$": + subpattern.append((AT, AT_END)) + + elif this and this[0] == "\\": + code =_fixescape(this) + subpattern.append(code) + + else: + raise SyntaxError, "parser error" + + return subpattern + +def parse(source, flags=()): + s = Tokenizer(source) + g = Pattern() + b = [] + while 1: + p = _parse(s, g, flags) + tail = s.get() + if tail == "|": + b.append(p) + elif tail == ")": + raise SyntaxError, "unbalanced parenthesis" + elif tail is None: + if b: + b.append(p) + p = SubPattern(g) + _branch(p, b) + break + else: + raise SyntaxError, "bogus characters at end of regular expression" + return p + +if __name__ == "__main__": + from pprint import pprint + from testpatterns import PATTERNS + a = b = c = 0 + for pattern, flags in PATTERNS: + if flags: + continue + print "-"*68 + try: + p = parse(pattern) + print repr(pattern), "->" + pprint(p.data) + import sre_compile + try: + code = sre_compile.compile(p) + c = c + 1 + except: + pass + a = a + 1 + except SyntaxError, v: + print "**", repr(pattern), v + b = b + 1 + print "-"*68 + print a, "of", b, "patterns successfully parsed" + print c, "of", b, "patterns successfully compiled" + diff --git a/Lib/dos-8x3/statcach.py b/Lib/dos-8x3/statcach.py index 770aef0..b5147c2 100755 --- a/Lib/dos-8x3/statcach.py +++ b/Lib/dos-8x3/statcach.py @@ -1,7 +1,7 @@ -# Module 'statcache' -# -# Maintain a cache of file stats. -# There are functions to reset the cache or to selectively remove items. +"""Maintain a cache of stat() information on files. + +There are functions to reset the cache or to selectively remove items. +""" import os from stat import * @@ -12,42 +12,37 @@ from stat import * cache = {} -# Stat a file, possibly out of the cache. -# def stat(path): + """Stat a file, possibly out of the cache.""" if cache.has_key(path): return cache[path] cache[path] = ret = os.stat(path) return ret -# Reset the cache completely. -# def reset(): + """Reset the cache completely.""" global cache cache = {} -# Remove a given item from the cache, if it exists. -# def forget(path): + """Remove a given item from the cache, if it exists.""" if cache.has_key(path): del cache[path] -# Remove all pathnames with a given prefix. -# def forget_prefix(prefix): + """Remove all pathnames with a given prefix.""" n = len(prefix) for path in cache.keys(): if path[:n] == prefix: del cache[path] -# Forget about a directory and all entries in it, but not about -# entries in subdirectories. -# def forget_dir(prefix): + """Forget about a directory and all entries in it, but not about + entries in subdirectories.""" if prefix[-1:] == '/' and prefix <> '/': prefix = prefix[:-1] forget(prefix) @@ -62,19 +57,17 @@ def forget_dir(prefix): del cache[path] -# Remove all pathnames except with a given prefix. -# Normally used with prefix = '/' after a chdir(). -# def forget_except_prefix(prefix): + """Remove all pathnames except with a given prefix. + Normally used with prefix = '/' after a chdir().""" n = len(prefix) for path in cache.keys(): if path[:n] <> prefix: del cache[path] -# Check for directory. -# def isdir(path): + """Check for directory.""" try: st = stat(path) except os.error: diff --git a/Lib/dos-8x3/stringio.py b/Lib/dos-8x3/stringio.py index fc195b9..8efd7d8 100755 --- a/Lib/dos-8x3/stringio.py +++ b/Lib/dos-8x3/stringio.py @@ -1,30 +1,32 @@ -# class StringIO implements file-like objects that read/write a -# string buffer (a.k.a. "memory files"). -# -# This implements (nearly) all stdio methods. -# -# f = StringIO() # ready for writing -# f = StringIO(buf) # ready for reading -# f.close() # explicitly release resources held -# flag = f.isatty() # always false -# pos = f.tell() # get current position -# f.seek(pos) # set current position -# f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF -# buf = f.read() # read until EOF -# buf = f.read(n) # read up to n bytes -# buf = f.readline() # read until end of line ('\n') or EOF -# list = f.readlines()# list of f.readline() results until EOF -# f.write(buf) # write at current position -# f.writelines(list) # for line in list: f.write(line) -# f.getvalue() # return whole file's contents as a string -# -# Notes: -# - Using a real file is often faster (but less convenient). -# - fileno() is left unimplemented so that code which uses it triggers -# an exception early. -# - Seeking far beyond EOF and then writing will insert real null -# bytes that occupy space in the buffer. -# - There's a simple test set (see end of this file). +"""File-like objects that read from or write to a string buffer. + +This implements (nearly) all stdio methods. + +f = StringIO() # ready for writing +f = StringIO(buf) # ready for reading +f.close() # explicitly release resources held +flag = f.isatty() # always false +pos = f.tell() # get current position +f.seek(pos) # set current position +f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF +buf = f.read() # read until EOF +buf = f.read(n) # read up to n bytes +buf = f.readline() # read until end of line ('\n') or EOF +list = f.readlines()# list of f.readline() results until EOF +f.write(buf) # write at current position +f.writelines(list) # for line in list: f.write(line) +f.getvalue() # return whole file's contents as a string + +Notes: +- Using a real file is often faster (but less convenient). +- There's also a much faster implementation in C, called cStringIO, but + it's not subclassable. +- fileno() is left unimplemented so that code which uses it triggers + an exception early. +- Seeking far beyond EOF and then writing will insert real null + bytes that occupy space in the buffer. +- There's a simple test set (see end of this file). +""" import string diff --git a/Lib/dos-8x3/stringol.py b/Lib/dos-8x3/stringol.py new file mode 100644 index 0000000..8fa7dd4 --- /dev/null +++ b/Lib/dos-8x3/stringol.py @@ -0,0 +1,431 @@ +# module 'string' -- A collection of string operations + +# Warning: most of the code you see here isn't normally used nowadays. With +# Python 1.6, many of these functions are implemented as methods on the +# standard string object. They used to be implemented by a built-in module +# called strop, but strop is now obsolete itself. + +"""Common string manipulations. + +Public module variables: + +whitespace -- a string containing all characters considered whitespace +lowercase -- a string containing all characters considered lowercase letters +uppercase -- a string containing all characters considered uppercase letters +letters -- a string containing all characters considered letters +digits -- a string containing all characters considered decimal digits +hexdigits -- a string containing all characters considered hexadecimal digits +octdigits -- a string containing all characters considered octal digits + +""" + +# Some strings for ctype-style character classification +whitespace = ' \t\n\r\v\f' +lowercase = 'abcdefghijklmnopqrstuvwxyz' +uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' +letters = lowercase + uppercase +digits = '0123456789' +hexdigits = digits + 'abcdef' + 'ABCDEF' +octdigits = '01234567' + +# Case conversion helpers +_idmap = '' +for i in range(256): _idmap = _idmap + chr(i) +del i + +# Backward compatible names for exceptions +index_error = ValueError +atoi_error = ValueError +atof_error = ValueError +atol_error = ValueError + +# convert UPPER CASE letters to lower case +def lower(s): + """lower(s) -> string + + Return a copy of the string s converted to lowercase. + + """ + return s.lower() + +# Convert lower case letters to UPPER CASE +def upper(s): + """upper(s) -> string + + Return a copy of the string s converted to uppercase. + + """ + return s.upper() + +# Swap lower case letters and UPPER CASE +def swapcase(s): + """swapcase(s) -> string + + Return a copy of the string s with upper case characters + converted to lowercase and vice versa. + + """ + return s.swapcase() + +# Strip leading and trailing tabs and spaces +def strip(s): + """strip(s) -> string + + Return a copy of the string s with leading and trailing + whitespace removed. + + """ + return s.strip() + +# Strip leading tabs and spaces +def lstrip(s): + """lstrip(s) -> string + + Return a copy of the string s with leading whitespace removed. + + """ + return s.lstrip() + +# Strip trailing tabs and spaces +def rstrip(s): + """rstrip(s) -> string + + Return a copy of the string s with trailing whitespace + removed. + + """ + return s.rstrip() + + +# Split a string into a list of space/tab-separated words +# NB: split(s) is NOT the same as splitfields(s, ' ')! +def split(s, sep=None, maxsplit=0): + """split(str [,sep [,maxsplit]]) -> list of strings + + Return a list of the words in the string s, using sep as the + delimiter string. If maxsplit is nonzero, splits into at most + maxsplit words If sep is not specified, any whitespace string + is a separator. Maxsplit defaults to 0. + + (split and splitfields are synonymous) + + """ + return s.split(sep, maxsplit) +splitfields = split + +# Join fields with optional separator +def join(words, sep = ' '): + """join(list [,sep]) -> string + + Return a string composed of the words in list, with + intervening occurences of sep. The default separator is a + single space. + + (joinfields and join are synonymous) + + """ + return sep.join(words) +joinfields = join + +# for a little bit of speed +_apply = apply + +# Find substring, raise exception if not found +def index(s, *args): + """index(s, sub [,start [,end]]) -> int + + Like find but raises ValueError when the substring is not found. + + """ + return _apply(s.index, args) + +# Find last substring, raise exception if not found +def rindex(s, *args): + """rindex(s, sub [,start [,end]]) -> int + + Like rfind but raises ValueError when the substring is not found. + + """ + return _apply(s.rindex, args) + +# Count non-overlapping occurrences of substring +def count(s, *args): + """count(s, sub[, start[,end]]) -> int + + Return the number of occurrences of substring sub in string + s[start:end]. Optional arguments start and end are + interpreted as in slice notation. + + """ + return _apply(s.count, args) + +# Find substring, return -1 if not found +def find(s, *args): + """find(s, sub [,start [,end]]) -> in + + Return the lowest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.find, args) + +# Find last substring, return -1 if not found +def rfind(s, *args): + """rfind(s, sub [,start [,end]]) -> int + + Return the highest index in s where substring sub is found, + such that sub is contained within s[start,end]. Optional + arguments start and end are interpreted as in slice notation. + + Return -1 on failure. + + """ + return _apply(s.rfind, args) + +# for a bit of speed +_float = float +_int = int +_long = long +_StringType = type('') + +# Convert string to float +def atof(s): + """atof(s) -> float + + Return the floating point number represented by the string s. + + """ + if type(s) == _StringType: + return _float(s) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + +# Convert string to integer +def atoi(*args): + """atoi(s [,base]) -> int + + Return the integer represented by the string s in the given + base, which defaults to 10. The string s must consist of one + or more digits, possibly preceded by a sign. If base is 0, it + is chosen from the leading characters of s, 0 for octal, 0x or + 0X for hexadecimal. If base is 16, a preceding 0x or 0X is + accepted. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to int(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_int, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + + +# Convert string to long integer +def atol(*args): + """atol(s [,base]) -> long + + Return the long integer represented by the string s in the + given base, which defaults to 10. The string s must consist + of one or more digits, possibly preceded by a sign. If base + is 0, it is chosen from the leading characters of s, 0 for + octal, 0x or 0X for hexadecimal. If base is 16, a preceding + 0x or 0X is accepted. A trailing L or l is not accepted, + unless base is 0. + + """ + try: + s = args[0] + except IndexError: + raise TypeError('function requires at least 1 argument: %d given' % + len(args)) + # Don't catch type error resulting from too many arguments to long(). The + # error message isn't compatible but the error type is, and this function + # is complicated enough already. + if type(s) == _StringType: + return _apply(_long, args) + else: + raise TypeError('argument 1: expected string, %s found' % + type(s).__name__) + + +# Left-justify a string +def ljust(s, width): + """ljust(s, width) -> string + + Return a left-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. + + """ + n = width - len(s) + if n <= 0: return s + return s + ' '*n + +# Right-justify a string +def rjust(s, width): + """rjust(s, width) -> string + + Return a right-justified version of s, in a field of the + specified width, padded with spaces as needed. The string is + never truncated. + + """ + n = width - len(s) + if n <= 0: return s + return ' '*n + s + +# Center a string +def center(s, width): + """center(s, width) -> string + + Return a center version of s, in a field of the specified + width. padded with spaces as needed. The string is never + truncated. + + """ + n = width - len(s) + if n <= 0: return s + half = n/2 + if n%2 and width%2: + # This ensures that center(center(s, i), j) = center(s, j) + half = half+1 + return ' '*half + s + ' '*(n-half) + +# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' +# Decadent feature: the argument may be a string or a number +# (Use of this is deprecated; it should be a string as with ljust c.s.) +def zfill(x, width): + """zfill(x, width) -> string + + Pad a numeric string x with zeros on the left, to fill a field + of the specified width. The string x is never truncated. + + """ + if type(x) == type(''): s = x + else: s = `x` + n = len(s) + if n >= width: return s + sign = '' + if s[0] in ('-', '+'): + sign, s = s[0], s[1:] + return sign + '0'*(width-n) + s + +# Expand tabs in a string. +# Doesn't take non-printing chars into account, but does understand \n. +def expandtabs(s, tabsize=8): + """expandtabs(s [,tabsize]) -> string + + Return a copy of the string s with all tab characters replaced + by the appropriate number of spaces, depending on the current + column, and the tabsize (default 8). + + """ + res = line = '' + for c in s: + if c == '\t': + c = ' '*(tabsize - len(line) % tabsize) + line = line + c + if c == '\n': + res = res + line + line = '' + return res + line + +# Character translation through look-up table. +def translate(s, table, deletions=""): + """translate(s,table [,deletechars]) -> string + + Return a copy of the string s, where all characters occurring + in the optional argument deletechars are removed, and the + remaining characters have been mapped through the given + translation table, which must be a string of length 256. + + """ + return s.translate(table, deletions) + +# Capitalize a string, e.g. "aBc dEf" -> "Abc def". +def capitalize(s): + """capitalize(s) -> string + + Return a copy of the string s with only its first character + capitalized. + + """ + return s.capitalize() + +# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". +# See also regsub.capwords(). +def capwords(s, sep=None): + """capwords(s, [sep]) -> string + + Split the argument into words using split, capitalize each + word using capitalize, and join the capitalized words using + join. Note that this replaces runs of whitespace characters by + a single space. + + """ + return join(map(capitalize, s.split(sep)), sep or ' ') + +# Construct a translation string +_idmapL = None +def maketrans(fromstr, tostr): + """maketrans(frm, to) -> string + + Return a translation table (a string of 256 bytes long) + suitable for use in string.translate. The strings frm and to + must be of the same length. + + """ + if len(fromstr) != len(tostr): + raise ValueError, "maketrans arguments must have same length" + global _idmapL + if not _idmapL: + _idmapL = map(None, _idmap) + L = _idmapL[:] + fromstr = map(ord, fromstr) + for i in range(len(fromstr)): + L[fromstr[i]] = tostr[i] + return joinfields(L, "") + +# Substring replacement (global) +def replace(s, old, new, maxsplit=0): + """replace (str, old, new[, maxsplit]) -> string + + Return a copy of string str with all occurrences of substring + old replaced by new. If the optional argument maxsplit is + given, only the first maxsplit occurrences are replaced. + + """ + return s.replace(old, new, maxsplit) + + +# XXX: transitional +# +# If string objects do not have methods, then we need to use the old string.py +# library, which uses strop for many more things than just the few outlined +# below. +try: + ''.upper +except AttributeError: + from stringold import * + +# Try importing optional built-in module "strop" -- if it exists, +# it redefines some string operations that are 100-1000 times faster. +# It also defines values for whitespace, lowercase and uppercase +# that match <ctype.h>'s definitions. + +try: + from strop import maketrans, lowercase, uppercase, whitespace + letters = lowercase + uppercase +except ImportError: + pass # Use the original versions diff --git a/Lib/dos-8x3/telnetli.py b/Lib/dos-8x3/telnetli.py index efb2b40..dfd549e 100644 --- a/Lib/dos-8x3/telnetli.py +++ b/Lib/dos-8x3/telnetli.py @@ -329,6 +329,7 @@ class Telnet: opt = self.rawq_getchar() self.msg('IAC %s %d', c == WILL and 'WILL' or 'WONT', ord(c)) + self.sock.send(IAC + DONT + opt) else: self.msg('IAC %s not recognized' % `c`) except EOFError: # raised by self.rawq_getchar() diff --git a/Lib/dos-8x3/test_bin.py b/Lib/dos-8x3/test_bin.py index fe3843d..28908ef 100644 --- a/Lib/dos-8x3/test_bin.py +++ b/Lib/dos-8x3/test_bin.py @@ -1,46 +1,93 @@ -#! /usr/bin/env python -"""Test script for the binascii C module - - Uses the mechanism of the python binhex module - Roger E. Masse -""" -import binhex -import tempfile +"""Test the binascii C module.""" + from test_support import verbose +import binascii + +# Show module doc string +print binascii.__doc__ + +# Show module exceptions +print binascii.Error +print binascii.Incomplete + +# Check presence and display doc strings of all functions +funcs = [] +for suffix in "base64", "hqx", "uu": + prefixes = ["a2b_", "b2a_"] + if suffix == "hqx": + prefixes.extend(["crc_", "rlecode_", "rledecode_"]) + for prefix in prefixes: + name = prefix + suffix + funcs.append(getattr(binascii, name)) +for func in funcs: + print "%-15s: %s" % (func.__name__, func.__doc__) + +# Create binary test data +testdata = "The quick brown fox jumps over the lazy dog.\r\n" +for i in range(256): + # Be slow so we don't depend on other modules + testdata = testdata + chr(i) +testdata = testdata + "\r\nHello world.\n" + +# Test base64 with valid data +print "base64 test" +MAX_BASE64 = 57 +lines = [] +for i in range(0, len(testdata), MAX_BASE64): + b = testdata[i:i+MAX_BASE64] + a = binascii.b2a_base64(b) + lines.append(a) + print a, +res = "" +for line in lines: + b = binascii.a2b_base64(line) + res = res + b +assert res == testdata + +# Test base64 with random invalid characters sprinkled throughout +# (This requires a new version of binascii.) +fillers = "" +valid = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/" +for i in range(256): + c = chr(i) + if c not in valid: + fillers = fillers + c +def addnoise(line): + noise = fillers + ratio = len(line) / len(noise) + res = "" + while line and noise: + if len(line) / len(noise) > ratio: + c, line = line[0], line[1:] + else: + c, noise = noise[0], noise[1:] + res = res + c + return res + noise + line +res = "" +for line in map(addnoise, lines): + b = binascii.a2b_base64(line) + res = res + b +assert res == testdata + +# Test uu +print "uu test" +MAX_UU = 45 +lines = [] +for i in range(0, len(testdata), MAX_UU): + b = testdata[i:i+MAX_UU] + a = binascii.b2a_uu(b) + lines.append(a) + print a, +res = "" +for line in lines: + b = binascii.a2b_uu(line) + res = res + b +assert res == testdata + +# Test crc32() +crc = binascii.crc32("Test the CRC-32 of") +crc = binascii.crc32(" this string.", crc) +if crc != 1571220330: + print "binascii.crc32() failed." -def test(): - - try: - fname1 = tempfile.mktemp() - fname2 = tempfile.mktemp() - f = open(fname1, 'w') - except: - raise ImportError, "Cannot test binascii without a temp file" - - start = 'Jack is my hero' - f.write(start) - f.close() - - binhex.binhex(fname1, fname2) - if verbose: - print 'binhex' - - binhex.hexbin(fname2, fname1) - if verbose: - print 'hexbin' - - f = open(fname1, 'r') - finish = f.readline() - - if start <> finish: - print 'Error: binhex <> hexbin' - elif verbose: - print 'binhex == hexbin' - - try: - import os - os.unlink(fname1) - os.unlink(fname2) - except: - pass -test() +# The hqx test is in test_binhex.py diff --git a/Lib/dos-8x3/test_con.py b/Lib/dos-8x3/test_con.py new file mode 100644 index 0000000..355135f --- /dev/null +++ b/Lib/dos-8x3/test_con.py @@ -0,0 +1,168 @@ +from test_support import TestFailed + +class base_set: + + def __init__(self, el): + self.el = el + +class set(base_set): + + def __contains__(self, el): + return self.el == el + +class seq(base_set): + + def __getitem__(self, n): + return [self.el][n] + +def check(ok, *args): + if not ok: + raise TestFailed, " ".join(map(str, args)) + +a = base_set(1) +b = set(1) +c = seq(1) + +check(1 in b, "1 not in set(1)") +check(0 not in b, "0 in set(1)") +check(1 in c, "1 not in seq(1)") +check(0 not in c, "0 in seq(1)") + +try: + 1 in a + check(0, "in base_set did not raise error") +except AttributeError: + pass + +try: + 1 not in a + check(0, "not in base_set did not raise error") +except AttributeError: + pass + +# Test char in string + +check('c' in 'abc', "'c' not in 'abc'") +check('d' not in 'abc', "'d' in 'abc'") + +try: + '' in 'abc' + check(0, "'' in 'abc' did not raise error") +except TypeError: + pass + +try: + 'ab' in 'abc' + check(0, "'ab' in 'abc' did not raise error") +except TypeError: + pass + +try: + None in 'abc' + check(0, "None in 'abc' did not raise error") +except TypeError: + pass + +# Test char in Unicode + +check('c' in u'abc', "'c' not in u'abc'") +check('d' not in u'abc', "'d' in u'abc'") + +try: + '' in u'abc' + check(0, "'' in u'abc' did not raise error") +except TypeError: + pass + +try: + 'ab' in u'abc' + check(0, "'ab' in u'abc' did not raise error") +except TypeError: + pass + +try: + None in u'abc' + check(0, "None in u'abc' did not raise error") +except TypeError: + pass + +# Test Unicode char in Unicode + +check(u'c' in u'abc', "u'c' not in u'abc'") +check(u'd' not in u'abc', "u'd' in u'abc'") + +try: + u'' in u'abc' + check(0, "u'' in u'abc' did not raise error") +except TypeError: + pass + +try: + u'ab' in u'abc' + check(0, "u'ab' in u'abc' did not raise error") +except TypeError: + pass + +# Test Unicode char in string + +check(u'c' in 'abc', "u'c' not in 'abc'") +check(u'd' not in 'abc', "u'd' in 'abc'") + +try: + u'' in 'abc' + check(0, "u'' in 'abc' did not raise error") +except TypeError: + pass + +try: + u'ab' in 'abc' + check(0, "u'ab' in 'abc' did not raise error") +except TypeError: + pass + +# A collection of tests on builtin sequence types +a = range(10) +for i in a: + check(i in a, "%s not in %s" % (`i`, `a`)) +check(16 not in a, "16 not in %s" % `a`) +check(a not in a, "%s not in %s" % (`a`, `a`)) + +a = tuple(a) +for i in a: + check(i in a, "%s not in %s" % (`i`, `a`)) +check(16 not in a, "16 not in %s" % `a`) +check(a not in a, "%s not in %s" % (`a`, `a`)) + +class Deviant1: + """Behaves strangely when compared + + This class is designed to make sure that the contains code + works when the list is modified during the check. + """ + + aList = range(15) + + def __cmp__(self, other): + if other == 12: + self.aList.remove(12) + self.aList.remove(13) + self.aList.remove(14) + return 1 + +check(Deviant1() not in Deviant1.aList, "Deviant1 failed") + +class Deviant2: + """Behaves strangely when compared + + This class raises an exception during comparison. That in + turn causes the comparison to fail with a TypeError. + """ + + def __cmp__(self, other): + if other == 4: + raise RuntimeError, "gotcha" + +try: + check(Deviant2() not in a, "oops") +except TypeError: + pass diff --git a/Lib/dos-8x3/test_cpi.py b/Lib/dos-8x3/test_cpi.py index 9088eb7..f5e920f 100644 --- a/Lib/dos-8x3/test_cpi.py +++ b/Lib/dos-8x3/test_cpi.py @@ -79,18 +79,29 @@ def dotest(): f.close() try: cPickle.dump(123, f) - except IOError: + except ValueError: pass else: - print "dump to closed file should raise IOError" + print "dump to closed file should raise ValueError" f = open(fn, "r") f.close() try: cPickle.load(f) - except IOError: + except ValueError: pass else: - print "load from closed file should raise IOError" + print "load from closed file should raise ValueError" os.remove(fn) + # Test specific bad cases + for i in range(10): + try: + x = cPickle.loads('garyp') + except cPickle.BadPickleGet, y: + del y + else: + print "unexpected success!" + break + + dotest() diff --git a/Lib/dos-8x3/test_ext.py b/Lib/dos-8x3/test_ext.py new file mode 100644 index 0000000..6cca199 --- /dev/null +++ b/Lib/dos-8x3/test_ext.py @@ -0,0 +1,143 @@ +from UserList import UserList + +def f(*a, **k): + print a, k + +def g(x, *y, **z): + print x, y, z + +def h(j=1, a=2, h=3): + print j, a, h + +f() +f(1) +f(1, 2) +f(1, 2, 3) + +f(1, 2, 3, *(4, 5)) +f(1, 2, 3, *[4, 5]) +f(1, 2, 3, *UserList([4, 5])) +f(1, 2, 3, **{'a':4, 'b':5}) +f(1, 2, 3, *(4, 5), **{'a':6, 'b':7}) +f(1, 2, 3, x=4, y=5, *(6, 7), **{'a':8, 'b':9}) + +try: + g() +except TypeError, err: + print "TypeError:", err +else: + print "should raise TypeError: not enough arguments; expected 1, got 0" + +try: + g(*()) +except TypeError, err: + print "TypeError:", err +else: + print "should raise TypeError: not enough arguments; expected 1, got 0" + +try: + g(*(), **{}) +except TypeError, err: + print "TypeError:", err +else: + print "should raise TypeError: not enough arguments; expected 1, got 0" + +g(1) +g(1, 2) +g(1, 2, 3) +g(1, 2, 3, *(4, 5)) +class Nothing: pass +try: + g(*Nothing()) +except AttributeError, attr: + pass +else: + print "should raise AttributeError: __len__" + +class Nothing: + def __len__(self): + return 5 +try: + g(*Nothing()) +except AttributeError, attr: + pass +else: + print "should raise AttributeError: __getitem__" + +class Nothing: + def __len__(self): + return 5 + def __getitem__(self, i): + if i < 3: + return i + else: + raise IndexError, i +g(*Nothing()) + +# make sure the function call doesn't stomp on the dictionary? +d = {'a': 1, 'b': 2, 'c': 3} +d2 = d.copy() +assert d == d2 +g(1, d=4, **d) +print d +print d2 +assert d == d2, "function call modified dictionary" + +# what about willful misconduct? +def saboteur(**kw): + kw['x'] = locals() +d = {} +saboteur(a=1, **d) +assert d == {} + +try: + g(1, 2, 3, **{'x':4, 'y':5}) +except TypeError, err: + print err +else: + print "should raise TypeError: keyword parameter redefined" + +try: + g(1, 2, 3, a=4, b=5, *(6, 7), **{'a':8, 'b':9}) +except TypeError, err: + print err +else: + print "should raise TypeError: keyword parameter redefined" + +try: + f(**{1:2}) +except TypeError, err: + print err +else: + print "should raise TypeError: keywords must be strings" + +try: + h(**{'e': 2}) +except TypeError, err: + print err +else: + print "should raise TypeError: unexpected keyword argument: e" + +try: + h(*h) +except TypeError, err: + print err +else: + print "should raise TypeError: * argument must be a tuple" + +try: + h(**h) +except TypeError, err: + print err +else: + print "should raise TypeError: ** argument must be a dictionary" + +def f2(*a, **b): + return a, b + +d = {} +for i in range(512): + key = 'k%d' % i + d[key] = i +a, b = f2(1, *(2, 3), **d) +print len(a), len(b), b == d diff --git a/Lib/dos-8x3/test_fcn.py b/Lib/dos-8x3/test_fcn.py index 10144c3..b59efda 100644 --- a/Lib/dos-8x3/test_fcn.py +++ b/Lib/dos-8x3/test_fcn.py @@ -18,7 +18,8 @@ if verbose: if sys.platform in ('netbsd1', 'freebsd2', 'freebsd3', - 'bsdos2', 'bsdos3', 'bsdos4'): + 'bsdos2', 'bsdos3', 'bsdos4', + 'openbsd', 'openbsd2'): lockdata = struct.pack('lxxxxlxxxxlhh', 0, 0, 0, FCNTL.F_WRLCK, 0) elif sys.platform in ['aix3', 'aix4']: lockdata = struct.pack('hhlllii', FCNTL.F_WRLCK, 0, 0, 0, 0, 0, 0) diff --git a/Lib/dos-8x3/test_for.py b/Lib/dos-8x3/test_for.py new file mode 100644 index 0000000..67e30df --- /dev/null +++ b/Lib/dos-8x3/test_for.py @@ -0,0 +1,68 @@ +"""This test checks for correct fork() behavior. + +We want fork1() semantics -- only the forking thread survives in the +child after a fork(). + +On some systems (e.g. Solaris without posix threads) we find that all +active threads survive in the child after a fork(); this is an error. + +""" + +import os, sys, time, thread + +try: + os.fork +except AttributeError: + raise ImportError, "os.fork not defined -- skipping test_fork1" + +LONGSLEEP = 2 + +SHORTSLEEP = 0.5 + +NUM_THREADS = 4 + +alive = {} + +stop = 0 + +def f(id): + while not stop: + alive[id] = os.getpid() + try: + time.sleep(SHORTSLEEP) + except IOError: + pass + +def main(): + for i in range(NUM_THREADS): + thread.start_new(f, (i,)) + + time.sleep(LONGSLEEP) + + a = alive.keys() + a.sort() + assert a == range(NUM_THREADS) + + prefork_lives = alive.copy() + + cpid = os.fork() + + if cpid == 0: + # Child + time.sleep(LONGSLEEP) + n = 0 + for key in alive.keys(): + if alive[key] != prefork_lives[key]: + n = n+1 + os._exit(n) + else: + # Parent + spid, status = os.waitpid(cpid, 0) + assert spid == cpid + assert status == 0, "cause = %d, exit = %d" % (status&0xff, status>>8) + global stop + # Tell threads to die + stop = 1 + time.sleep(2*SHORTSLEEP) # Wait for threads to die + +main() diff --git a/Lib/dos-8x3/test_gdb.py b/Lib/dos-8x3/test_gdb.py index 22db6aa..030218e 100644 --- a/Lib/dos-8x3/test_gdb.py +++ b/Lib/dos-8x3/test_gdb.py @@ -5,7 +5,7 @@ import gdbm from gdbm import error -from test_support import verbose +from test_support import verbose, TestFailed filename= '/tmp/delete_me' @@ -18,6 +18,12 @@ if verbose: g.has_key('a') g.close() +try: + g['a'] +except error: + pass +else: + raise TestFailed, "expected gdbm.error accessing closed database" g = gdbm.open(filename, 'r') g.close() g = gdbm.open(filename, 'rw') diff --git a/Lib/dos-8x3/test_gra.py b/Lib/dos-8x3/test_gra.py index 21012d2..fa09e8c 100755 --- a/Lib/dos-8x3/test_gra.py +++ b/Lib/dos-8x3/test_gra.py @@ -140,11 +140,17 @@ x = eval('1, 0 or 1') print 'funcdef' ### 'def' NAME parameters ':' suite ### parameters: '(' [varargslist] ')' -### varargslist: (fpdef ['=' test] ',')* '*' NAME -### | fpdef ['=' test] (',' fpdef ['=' test])* [','] +### varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' ('**'|'*' '*') NAME] +### | ('**'|'*' '*') NAME) +### | fpdef ['=' test] (',' fpdef ['=' test])* [','] ### fpdef: NAME | '(' fplist ')' ### fplist: fpdef (',' fpdef)* [','] +### arglist: (argument ',')* (argument | *' test [',' '**' test] | '**' test) +### argument: [test '='] test # Really [keyword '='] test def f1(): pass +f1() +f1(*()) +f1(*(), **{}) def f2(one_argument): pass def f3(two, arguments): pass def f4(two, (compound, (argument, list))): pass @@ -157,16 +163,27 @@ def v3(a, (b, c), *rest): pass def d01(a=1): pass d01() d01(1) +d01(*(1,)) +d01(**{'a':2}) def d11(a, b=1): pass d11(1) d11(1, 2) +d11(1, **{'b':2}) def d21(a, b, c=1): pass d21(1, 2) d21(1, 2, 3) +d21(*(1, 2, 3)) +d21(1, *(2, 3)) +d21(1, 2, *(3,)) +d21(1, 2, **{'c':3}) def d02(a=1, b=2): pass d02() d02(1) d02(1, 2) +d02(*(1, 2)) +d02(1, *(2,)) +d02(1, **{'b':2}) +d02(**{'a': 1, 'b': 2}) def d12(a, b=1, c=2): pass d12(1) d12(1, 2) @@ -179,6 +196,9 @@ def d01v(a=1, *rest): pass d01v() d01v(1) d01v(1, 2) +d01v(*(1, 2, 3, 4)) +d01v(*(1,)) +d01v(**{'a':2}) def d11v(a, b=1, *rest): pass d11v(1) d11v(1, 2) @@ -187,21 +207,31 @@ def d21v(a, b, c=1, *rest): pass d21v(1, 2) d21v(1, 2, 3) d21v(1, 2, 3, 4) +d21v(*(1, 2, 3, 4)) +d21v(1, 2, **{'c': 3}) def d02v(a=1, b=2, *rest): pass d02v() d02v(1) d02v(1, 2) d02v(1, 2, 3) +d02v(1, *(2, 3, 4)) +d02v(**{'a': 1, 'b': 2}) def d12v(a, b=1, c=2, *rest): pass d12v(1) d12v(1, 2) d12v(1, 2, 3) d12v(1, 2, 3, 4) +d12v(*(1, 2, 3, 4)) +d12v(1, 2, *(3, 4, 5)) +d12v(1, *(2,), **{'c': 3}) def d22v(a, b, c=1, d=2, *rest): pass d22v(1, 2) d22v(1, 2, 3) d22v(1, 2, 3, 4) d22v(1, 2, 3, 4, 5) +d22v(*(1, 2, 3, 4)) +d22v(1, 2, *(3, 4, 5)) +d22v(1, *(2, 3), **{'d': 4}) ### stmt: simple_stmt | compound_stmt # Tested below @@ -455,6 +485,7 @@ v2(1,2,3,4,5,6,7,8,9,0) v3(1,(2,3)) v3(1,(2,3),4) v3(1,(2,3),4,5,6,7,8,9,0) +print import sys, time c = sys.path[0] x = time.time() diff --git a/Lib/dos-8x3/test_lon.py b/Lib/dos-8x3/test_lon.py index f235d9c..6a1f117 100644 --- a/Lib/dos-8x3/test_lon.py +++ b/Lib/dos-8x3/test_lon.py @@ -77,6 +77,8 @@ def getran2(ndigits): def test_division_2(x, y): q, r = divmod(x, y) q2, r2 = x/y, x%y + pab, pba = x*y, y*x + check(pab == pba, "multiplication does not commute for", x, y) check(q == q2, "divmod returns different quotient than / for", x, y) check(r == r2, "divmod returns different mod than % for", x, y) check(x == q*y + r, "x != q*y + r after divmod on", x, y) @@ -159,7 +161,7 @@ def test_bitop_identities(maxdigits=MAXDIGITS): test_bitop_identities_2(x, y) test_bitop_identities_3(x, y, getran((lenx + leny)/2)) -# ------------------------------------------------------ hex oct str atol +# ------------------------------------------------- hex oct repr str atol def slow_format(x, base): if (x, base) == (0, 8): @@ -181,12 +183,18 @@ def slow_format(x, base): def test_format_1(x): from string import atol - for base, mapper in (8, oct), (10, str), (16, hex): + for base, mapper in (8, oct), (10, repr), (16, hex): got = mapper(x) expected = slow_format(x, base) check(got == expected, mapper.__name__, "returned", got, "but expected", expected, "for", x) check(atol(got, 0) == x, 'atol("%s", 0) !=' % got, x) + # str() has to be checked a little differently since there's no + # trailing "L" + got = str(x) + expected = slow_format(x, 10)[:-1] + check(got == expected, mapper.__name__, "returned", + got, "but expected", expected, "for", x) def test_format(maxdigits=MAXDIGITS): print "long str/hex/oct/atol" diff --git a/Lib/dos-8x3/test_mma.py b/Lib/dos-8x3/test_mma.py new file mode 100644 index 0000000..e5da187 --- /dev/null +++ b/Lib/dos-8x3/test_mma.py @@ -0,0 +1,67 @@ + +import mmap +import string, os, re, sys + +PAGESIZE = mmap.PAGESIZE + +def test_both(): + "Test mmap module on Unix systems and Windows" + + # Create an mmap'ed file + f = open('foo', 'w+') + + # Write 2 pages worth of data to the file + f.write('\0'* PAGESIZE) + f.write('foo') + f.write('\0'* (PAGESIZE-3) ) + + m = mmap.mmap(f.fileno(), 2 * PAGESIZE) + f.close() + + # Simple sanity checks + print ' Position of foo:', string.find(m, 'foo') / float(PAGESIZE), 'pages' + assert string.find(m, 'foo') == PAGESIZE + + print ' Length of file:', len(m) / float(PAGESIZE), 'pages' + assert len(m) == 2*PAGESIZE + + print ' Contents of byte 0:', repr(m[0]) + assert m[0] == '\0' + print ' Contents of first 3 bytes:', repr(m[0:3]) + assert m[0:3] == '\0\0\0' + + # Modify the file's content + print "\n Modifying file's content..." + m[0] = '3' + m[PAGESIZE +3: PAGESIZE +3+3]='bar' + + # Check that the modification worked + print ' Contents of byte 0:', repr(m[0]) + assert m[0] == '3' + print ' Contents of first 3 bytes:', repr(m[0:3]) + assert m[0:3] == '3\0\0' + print ' Contents of second page:', m[PAGESIZE-1 : PAGESIZE + 7] + assert m[PAGESIZE-1 : PAGESIZE + 7] == '\0foobar\0' + + m.flush() + + # Test doing a regular expression match in an mmap'ed file + match=re.search('[A-Za-z]+', m) + if match == None: + print ' ERROR: regex match on mmap failed!' + else: + start, end = match.span(0) + length = end - start + + print ' Regex match on mmap (page start, length of match):', + print start / float(PAGESIZE), length + + assert start == PAGESIZE + assert end == PAGESIZE + 6 + + m.close() + os.unlink("foo") + print ' Test passed' + +test_both() + diff --git a/Lib/dos-8x3/test_pye.py b/Lib/dos-8x3/test_pye.py new file mode 100644 index 0000000..9f6d8d0 --- /dev/null +++ b/Lib/dos-8x3/test_pye.py @@ -0,0 +1,107 @@ +# Very simple test - Parse a file and print what happens + +# XXX TypeErrors on calling handlers, or on bad return values from a +# handler, are obscure and unhelpful. + +import sys, string +import os + +import pyexpat + +class Outputter: + def StartElementHandler(self, name, attrs): + print 'Start element:\n\t', name, attrs + + def EndElementHandler(self, name): + print 'End element:\n\t', name + + def CharacterDataHandler(self, data): + data = string.strip(data) + if data: + print 'Character data:' + print '\t', repr(data) + + def ProcessingInstructionHandler(self, target, data): + print 'PI:\n\t', target, data + + def StartNamespaceDeclHandler(self, prefix, uri): + print 'NS decl:\n\t', prefix, uri + + def EndNamespaceDeclHandler(self, prefix): + print 'End of NS decl:\n\t', prefix + + def StartCdataSectionHandler(self): + print 'Start of CDATA section' + + def EndCdataSectionHandler(self): + print 'End of CDATA section' + + def CommentHandler(self, text): + print 'Comment:\n\t', repr(text) + + def NotationDeclHandler(self, *args): + name, base, sysid, pubid = args + print 'Notation declared:', args + + def UnparsedEntityDeclHandler(self, *args): + entityName, base, systemId, publicId, notationName = args + print 'Unparsed entity decl:\n\t', args + + def NotStandaloneHandler(self, userData): + print 'Not standalone' + return 1 + + def ExternalEntityRefHandler(self, context, base, sysId, pubId): + print 'External entity ref:', context, base, sysId, pubId + return 1 + + def DefaultHandler(self, userData): + pass + + def DefaultHandlerExpand(self, userData): + pass + + +out = Outputter() +parser = pyexpat.ParserCreate(namespace_separator='!') +for name in ['StartElementHandler', 'EndElementHandler', + 'CharacterDataHandler', 'ProcessingInstructionHandler', + 'UnparsedEntityDeclHandler', 'NotationDeclHandler', + 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', + 'CommentHandler', 'StartCdataSectionHandler', + 'EndCdataSectionHandler', + 'DefaultHandler', 'DefaultHandlerExpand', + #'NotStandaloneHandler', + 'ExternalEntityRefHandler' + ]: + setattr(parser, name, getattr(out, name) ) + +data = """<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> +<?xml-stylesheet href="stylesheet.css"?> +<!-- comment data --> +<!DOCTYPE quotations SYSTEM "quotations.dtd" [ +<!ELEMENT root ANY> +<!NOTATION notation SYSTEM "notation.jpeg"> +<!ENTITY acirc "â"> +<!ENTITY external_entity SYSTEM "entity.file"> +<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> +%unparsed_entity; +]> + +<root> +<myns:subelement xmlns:myns="http://www.python.org/namespace"> + Contents of subelements +</myns:subelement> +<sub2><![CDATA[contents of CDATA section]]></sub2> +&external_entity; +</root> +""" + +try: + parser.Parse(data, 1) +except pyexpat.error: + print '** Error', parser.ErrorCode, pyexpat.ErrorString( parser.ErrorCode) + print '** Line', parser.ErrorLineNumber + print '** Column', parser.ErrorColumnNumber + print '** Byte', parser.ErrorByteIndex + diff --git a/Lib/dos-8x3/test_rfc.py b/Lib/dos-8x3/test_rfc.py index b8ae8c7..0d4c66f 100644 --- a/Lib/dos-8x3/test_rfc.py +++ b/Lib/dos-8x3/test_rfc.py @@ -11,6 +11,7 @@ def test(msg, results): fp.seek(0) m = rfc822.Message(fp) i = 0 + for n, a in m.getaddrlist('to') + m.getaddrlist('cc'): if verbose: print 'name:', repr(n), 'addr:', repr(a) @@ -28,6 +29,21 @@ def test(msg, results): print ' [no match]' print 'not found:', repr(n), repr(a) + out = m.getdate('date') + if out: + if verbose: + print 'Date:', m.getheader('date') + if out == (1999, 1, 13, 23, 57, 35, 0, 0, 0): + if verbose: + print ' [matched]' + else: + if verbose: + print ' [no match]' + print 'Date conversion failed:', out + +# Note: all test cases must have the same date (in various formats), +# or no date! + test('''Date: Wed, 13 Jan 1999 23:57:35 -0500 From: Guido van Rossum <guido@CNRI.Reston.VA.US> To: "Guido van @@ -40,6 +56,7 @@ test2 test('''From: Barry <bwarsaw@python.org To: guido@python.org (Guido: the Barbarian) Subject: nonsense +Date: Wednesday, January 13 1999 23:57:35 -0500 test''', [('Guido: the Barbarian', 'guido@python.org'), ]) @@ -47,6 +64,7 @@ test''', [('Guido: the Barbarian', 'guido@python.org'), test('''From: Barry <bwarsaw@python.org To: guido@python.org (Guido: the Barbarian) Cc: "Guido: the Madman" <guido@python.org> +Date: 13-Jan-1999 23:57:35 EST test''', [('Guido: the Barbarian', 'guido@python.org'), ('Guido: the Madman', 'guido@python.org') @@ -54,6 +72,7 @@ test''', [('Guido: the Barbarian', 'guido@python.org'), test('''To: "The monster with the very long name: Guido" <guido@python.org> +Date: Wed, 13 Jan 1999 23:57:35 -0500 test''', [('The monster with\n the very long name: Guido', 'guido@python.org')]) @@ -63,6 +82,7 @@ CC: Mike Fletcher <mfletch@vrtelecom.com>, "'string-sig@python.org'" <string-sig@python.org> Cc: fooz@bat.com, bart@toof.com Cc: goit@lip.com +Date: Wed, 13 Jan 1999 23:57:35 -0500 test''', [('Amit J. Patel', 'amitp@Theory.Stanford.EDU'), ('Mike Fletcher', 'mfletch@vrtelecom.com'), @@ -75,8 +95,28 @@ test''', [('Amit J. Patel', 'amitp@Theory.Stanford.EDU'), # This one is just twisted. I don't know what the proper result should be, # but it shouldn't be to infloop, which is what used to happen! test('''To: <[smtp:dd47@mail.xxx.edu]_at_hmhq@hdq-mdm1-imgout.companay.com> +Date: Wed, 13 Jan 1999 23:57:35 -0500 test''', [('', ''), ('', 'dd47@mail.xxx.edu'), ('', '_at_hmhq@hdq-mdm1-imgout.companay.com') ]) + +# This exercises the old commas-in-a-full-name bug, which should be doing the +# right thing in recent versions of the module. +test('''To: "last, first" <userid@foo.net> + +test''', [('last, first', 'userid@foo.net'), + ]) + +test('''To: (Comment stuff) "Quoted name"@somewhere.com + +test''', [('Comment stuff', '"Quoted name"@somewhere.com'), + ]) + +test('''To: : +Cc: goit@lip.com +Date: Wed, 13 Jan 1999 23:57:35 -0500 + +test''', [('', 'goit@lip.com')]) + diff --git a/Lib/dos-8x3/test_soc.py b/Lib/dos-8x3/test_soc.py index 772b86b..83135f9 100644 --- a/Lib/dos-8x3/test_soc.py +++ b/Lib/dos-8x3/test_soc.py @@ -97,7 +97,7 @@ try: if not canfork or os.fork(): # parent is server s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.bind(hostname, PORT) + s.bind((hostname, PORT)) s.listen(1) if verbose: print 'parent accepting' @@ -133,7 +133,7 @@ try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if verbose: print 'child connecting' - s.connect(hostname, PORT) + s.connect((hostname, PORT)) msg = 'socket test' s.send(msg) data = s.recv(1024) diff --git a/Lib/dos-8x3/test_typ.py b/Lib/dos-8x3/test_typ.py index 072e6d2..e22b0e2 100755 --- a/Lib/dos-8x3/test_typ.py +++ b/Lib/dos-8x3/test_typ.py @@ -130,7 +130,9 @@ if len([1,]) <> 1: raise TestFailed, 'len([1,])' if len([1,2,3,4,5,6]) <> 6: raise TestFailed, 'len([1,2,3,4,5,6])' if [1,2]+[3,4] <> [1,2,3,4]: raise TestFailed, 'list concatenation' if [1,2]*3 <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3' +if [1,2]*3L <> [1,2,1,2,1,2]: raise TestFailed, 'list repetition *3L' if 0*[1,2,3] <> []: raise TestFailed, 'list repetition 0*' +if 0L*[1,2,3] <> []: raise TestFailed, 'list repetition 0L*' if min([1,2]) <> 1 or max([1,2]) <> 2: raise TestFailed, 'min/max list' if 0 in [0,1,2] and 1 in [0,1,2] and 2 in [0,1,2] and 3 not in [0,1,2]: pass else: raise TestFailed, 'in/not in list' @@ -150,10 +152,17 @@ if a != [1, 1, 2, 3, 4, 5, 5]: print '6.5.3a Additional list operations' a = [0,1,2,3,4] +a[0L] = 1 +a[1L] = 2 +a[2L] = 3 +if a <> [1,2,3,3,4]: raise TestFailed, 'list item assignment [0L], [1L], [2L]' a[0] = 5 a[1] = 6 a[2] = 7 if a <> [5,6,7,3,4]: raise TestFailed, 'list item assignment [0], [1], [2]' +a[-2L] = 88 +a[-1L] = 99 +if a <> [5,6,7,88,99]: raise TestFailed, 'list item assignment [-2L], [-1L]' a[-2] = 8 a[-1] = 9 if a <> [5,6,7,8,9]: raise TestFailed, 'list item assignment [-2], [-1]' @@ -161,12 +170,21 @@ a[:2] = [0,4] a[-3:] = [] a[1:1] = [1,2,3] if a <> [0,1,2,3,4]: raise TestFailed, 'list slice assignment' +a[ 1L : 4L] = [7,8,9] +if a <> [0,7,8,9,4]: raise TestFailed, 'list slice assignment using long ints' del a[1:4] if a <> [0,4]: raise TestFailed, 'list slice deletion' del a[0] if a <> [4]: raise TestFailed, 'list item deletion [0]' del a[-1] if a <> []: raise TestFailed, 'list item deletion [-1]' +a=range(0,5) +del a[1L:4L] +if a <> [0,4]: raise TestFailed, 'list slice deletion' +del a[0L] +if a <> [4]: raise TestFailed, 'list item deletion [0]' +del a[-1L] +if a <> []: raise TestFailed, 'list item deletion [-1]' a.append(0) a.append(1) a.append(2) @@ -192,6 +210,13 @@ def myComparison(x,y): z = range(12) z.sort(myComparison) +# Test extreme cases with long ints +a = [0,1,2,3,4] +if a[ -pow(2,128L): 3 ] != [0,1,2]: + raise TestFailed, "list slicing with too-small long integer" +if a[ 3: pow(2,145L) ] != [3,4]: + raise TestFailed, "list slicing with too-large long integer" + print '6.6 Mappings == Dictionaries' d = {} if d.keys() <> []: raise TestFailed, '{}.keys()' diff --git a/Lib/dos-8x3/test_uni.py b/Lib/dos-8x3/test_uni.py new file mode 100644 index 0000000..45449ee --- /dev/null +++ b/Lib/dos-8x3/test_uni.py @@ -0,0 +1,401 @@ +""" Test script for the Unicode implementation. + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. + +""" +from test_support import verbose +import sys + +def test(method, input, output, *args): + if verbose: + print '%s.%s%s =? %s... ' % (repr(input), method, args, output), + try: + f = getattr(input, method) + value = apply(f, args) + except: + value = sys.exc_type + exc = sys.exc_info()[:2] + else: + exc = None + if value != output: + if verbose: + print 'no' + print '*',f, `input`, `output`, `value` + if exc: + print ' value == %s: %s' % (exc) + else: + if verbose: + print 'yes' + +test('capitalize', u' hello ', u' hello ') +test('capitalize', u'hello ', u'Hello ') + +test('title', u' hello ', u' Hello ') +test('title', u'hello ', u'Hello ') +test('title', u"fOrMaT thIs aS titLe String", u'Format This As Title String') +test('title', u"fOrMaT,thIs-aS*titLe;String", u'Format,This-As*Title;String') +test('title', u"getInt", u'Getint') + +test('find', u'abcdefghiabc', 0, u'abc') +test('find', u'abcdefghiabc', 9, u'abc', 1) +test('find', u'abcdefghiabc', -1, u'def', 4) + +test('rfind', u'abcdefghiabc', 9, u'abc') + +test('lower', u'HeLLo', u'hello') +test('lower', u'hello', u'hello') + +test('upper', u'HeLLo', u'HELLO') +test('upper', u'HELLO', u'HELLO') + +if 0: + transtable = '\000\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037 !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`xyzdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377' + + test('maketrans', u'abc', transtable, u'xyz') + test('maketrans', u'abc', ValueError, u'xyzq') + +test('split', u'this is the split function', + [u'this', u'is', u'the', u'split', u'function']) +test('split', u'a|b|c|d', [u'a', u'b', u'c', u'd'], u'|') +test('split', u'a|b|c|d', [u'a', u'b', u'c|d'], u'|', 2) +test('split', u'a b c d', [u'a', u'b c d'], None, 1) +test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2) +test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 3) +test('split', u'a b c d', [u'a', u'b', u'c', u'd'], None, 4) +test('split', u'a b c d', [u'a b c d'], None, 0) +test('split', u'a b c d', [u'a', u'b', u'c d'], None, 2) +test('split', u'a b c d ', [u'a', u'b', u'c', u'd']) + +# join now works with any sequence type +class Sequence: + def __init__(self): self.seq = 'wxyz' + def __len__(self): return len(self.seq) + def __getitem__(self, i): return self.seq[i] + +test('join', u' ', u'a b c d', [u'a', u'b', u'c', u'd']) +test('join', u'', u'abcd', (u'a', u'b', u'c', u'd')) +test('join', u' ', u'w x y z', Sequence()) +test('join', u' ', TypeError, 7) + +class BadSeq(Sequence): + def __init__(self): self.seq = [7, u'hello', 123L] + +test('join', u' ', TypeError, BadSeq()) + +result = u'' +for i in range(10): + if i > 0: + result = result + u':' + result = result + u'x'*10 +test('join', u':', result, [u'x' * 10] * 10) +test('join', u':', result, (u'x' * 10,) * 10) + +test('strip', u' hello ', u'hello') +test('lstrip', u' hello ', u'hello ') +test('rstrip', u' hello ', u' hello') +test('strip', u'hello', u'hello') + +test('swapcase', u'HeLLo cOmpUteRs', u'hEllO CoMPuTErS') + +if 0: + test('translate', u'xyzabcdef', u'xyzxyz', transtable, u'def') + + table = string.maketrans('a', u'A') + test('translate', u'abc', u'Abc', table) + test('translate', u'xyz', u'xyz', table) + +test('replace', u'one!two!three!', u'one@two!three!', u'!', u'@', 1) +test('replace', u'one!two!three!', u'onetwothree', '!', '') +test('replace', u'one!two!three!', u'one@two@three!', u'!', u'@', 2) +test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 3) +test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@', 4) +test('replace', u'one!two!three!', u'one!two!three!', u'!', u'@', 0) +test('replace', u'one!two!three!', u'one@two@three@', u'!', u'@') +test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@') +test('replace', u'one!two!three!', u'one!two!three!', u'x', u'@', 2) + +test('startswith', u'hello', 1, u'he') +test('startswith', u'hello', 1, u'hello') +test('startswith', u'hello', 0, u'hello world') +test('startswith', u'hello', 1, u'') +test('startswith', u'hello', 0, u'ello') +test('startswith', u'hello', 1, u'ello', 1) +test('startswith', u'hello', 1, u'o', 4) +test('startswith', u'hello', 0, u'o', 5) +test('startswith', u'hello', 1, u'', 5) +test('startswith', u'hello', 0, u'lo', 6) +test('startswith', u'helloworld', 1, u'lowo', 3) +test('startswith', u'helloworld', 1, u'lowo', 3, 7) +test('startswith', u'helloworld', 0, u'lowo', 3, 6) + +test('endswith', u'hello', 1, u'lo') +test('endswith', u'hello', 0, u'he') +test('endswith', u'hello', 1, u'') +test('endswith', u'hello', 0, u'hello world') +test('endswith', u'helloworld', 0, u'worl') +test('endswith', u'helloworld', 1, u'worl', 3, 9) +test('endswith', u'helloworld', 1, u'world', 3, 12) +test('endswith', u'helloworld', 1, u'lowo', 1, 7) +test('endswith', u'helloworld', 1, u'lowo', 2, 7) +test('endswith', u'helloworld', 1, u'lowo', 3, 7) +test('endswith', u'helloworld', 0, u'lowo', 4, 7) +test('endswith', u'helloworld', 0, u'lowo', 3, 8) +test('endswith', u'ab', 0, u'ab', 0, 1) +test('endswith', u'ab', 0, u'ab', 0, 0) + +test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi') +test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 8) +test('expandtabs', u'abc\rab\tdef\ng\thi', u'abc\rab def\ng hi', 4) +test('expandtabs', u'abc\r\nab\tdef\ng\thi', u'abc\r\nab def\ng hi', 4) + +if 0: + test('capwords', u'abc def ghi', u'Abc Def Ghi') + test('capwords', u'abc\tdef\nghi', u'Abc Def Ghi') + test('capwords', u'abc\t def \nghi', u'Abc Def Ghi') + +# Comparisons: +print 'Testing Unicode comparisons...', +assert u'abc' == 'abc' +assert 'abc' == u'abc' +assert u'abc' == u'abc' +assert u'abcd' > 'abc' +assert 'abcd' > u'abc' +assert u'abcd' > u'abc' +assert u'abc' < 'abcd' +assert 'abc' < u'abcd' +assert u'abc' < u'abcd' +print 'done.' + +test('ljust', u'abc', u'abc ', 10) +test('rjust', u'abc', u' abc', 10) +test('center', u'abc', u' abc ', 10) +test('ljust', u'abc', u'abc ', 6) +test('rjust', u'abc', u' abc', 6) +test('center', u'abc', u' abc ', 6) +test('ljust', u'abc', u'abc', 2) +test('rjust', u'abc', u'abc', 2) +test('center', u'abc', u'abc', 2) + +test('islower', u'a', 1) +test('islower', u'A', 0) +test('islower', u'\n', 0) +test('islower', u'\u1FFc', 0) +test('islower', u'abc', 1) +test('islower', u'aBc', 0) +test('islower', u'abc\n', 1) + +test('isupper', u'a', 0) +test('isupper', u'A', 1) +test('isupper', u'\n', 0) +test('isupper', u'\u1FFc', 0) +test('isupper', u'ABC', 1) +test('isupper', u'AbC', 0) +test('isupper', u'ABC\n', 1) + +test('istitle', u'a', 0) +test('istitle', u'A', 1) +test('istitle', u'\n', 0) +test('istitle', u'\u1FFc', 1) +test('istitle', u'A Titlecased Line', 1) +test('istitle', u'A\nTitlecased Line', 1) +test('istitle', u'A Titlecased, Line', 1) +test('istitle', u'Greek \u1FFcitlecases ...', 1) +test('istitle', u'Not a capitalized String', 0) +test('istitle', u'Not\ta Titlecase String', 0) +test('istitle', u'Not--a Titlecase String', 0) + +test('splitlines', u"abc\ndef\n\rghi", [u'abc', u'def', u'', u'ghi']) +test('splitlines', u"abc\ndef\n\r\nghi", [u'abc', u'def', u'', u'ghi']) +test('splitlines', u"abc\ndef\r\nghi", [u'abc', u'def', u'ghi']) +test('splitlines', u"abc\ndef\r\nghi\n", [u'abc', u'def', u'ghi']) +test('splitlines', u"abc\ndef\r\nghi\n\r", [u'abc', u'def', u'ghi', u'']) +test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'', u'abc', u'def', u'ghi', u'']) +test('splitlines', u"\nabc\ndef\r\nghi\n\r", [u'\n', u'abc\n', u'def\r\n', u'ghi\n', u'\r'], 1) + +test('translate', u"abababc", u'bbbc', {ord('a'):None}) +test('translate', u"abababc", u'iiic', {ord('a'):None, ord('b'):ord('i')}) +test('translate', u"abababc", u'iiix', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'}) + +# Contains: +print 'Testing Unicode contains method...', +assert ('a' in u'abdb') == 1 +assert ('a' in u'bdab') == 1 +assert ('a' in u'bdaba') == 1 +assert ('a' in u'bdba') == 1 +assert ('a' in u'bdba') == 1 +assert (u'a' in u'bdba') == 1 +assert (u'a' in u'bdb') == 0 +assert (u'a' in 'bdb') == 0 +assert (u'a' in 'bdba') == 1 +assert (u'a' in ('a',1,None)) == 1 +assert (u'a' in (1,None,'a')) == 1 +assert (u'a' in (1,None,u'a')) == 1 +assert ('a' in ('a',1,None)) == 1 +assert ('a' in (1,None,'a')) == 1 +assert ('a' in (1,None,u'a')) == 1 +assert ('a' in ('x',1,u'y')) == 0 +assert ('a' in ('x',1,None)) == 0 +print 'done.' + +# Formatting: +print 'Testing Unicode formatting strings...', +assert u"%s, %s" % (u"abc", "abc") == u'abc, abc' +assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3) == u'abc, abc, 1, 2.000000, 3.00' +assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3) == u'abc, abc, 1, -2.000000, 3.00' +assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5) == u'abc, abc, -1, -2.000000, 3.50' +assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57) == u'abc, abc, -1, -2.000000, 3.57' +assert u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57) == u'abc, abc, -1, -2.000000, 1003.57' +assert u"%c" % (u"abc",) == u'a' +assert u"%c" % ("abc",) == u'a' +assert u"%c" % (34,) == u'"' +assert u"%c" % (36,) == u'$' +assert u"%r, %r" % (u"abc", "abc") == u"u'abc', 'abc'" +assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def' +assert u"%(x)s, %(ä)s" % {'x':u"abc", u'ä'.encode('utf-8'):"def"} == u'abc, def' +# formatting jobs delegated from the string implementation: +assert '...%(foo)s...' % {'foo':u"abc"} == u'...abc...' +assert '...%(foo)s...' % {'foo':"abc"} == '...abc...' +assert '...%(foo)s...' % {u'foo':"abc"} == '...abc...' +assert '...%(foo)s...' % {u'foo':u"abc"} == u'...abc...' +assert '...%(foo)s...' % {u'foo':u"abc",'def':123} == u'...abc...' +assert '...%(foo)s...' % {u'foo':u"abc",u'def':123} == u'...abc...' +assert '...%s...%s...%s...%s...' % (1,2,3,u"abc") == u'...1...2...3...abc...' +assert '...%s...' % u"abc" == u'...abc...' +print 'done.' + +# Test builtin codecs +print 'Testing builtin codecs...', + +assert unicode('hello','ascii') == u'hello' +assert unicode('hello','utf-8') == u'hello' +assert unicode('hello','utf8') == u'hello' +assert unicode('hello','latin-1') == u'hello' + +try: + u'Andr\202 x'.encode('ascii') + u'Andr\202 x'.encode('ascii','strict') +except ValueError: + pass +else: + raise AssertionError, "u'Andr\202'.encode('ascii') failed to raise an exception" +assert u'Andr\202 x'.encode('ascii','ignore') == "Andr x" +assert u'Andr\202 x'.encode('ascii','replace') == "Andr? x" + +try: + unicode('Andr\202 x','ascii') + unicode('Andr\202 x','ascii','strict') +except ValueError: + pass +else: + raise AssertionError, "unicode('Andr\202') failed to raise an exception" +assert unicode('Andr\202 x','ascii','ignore') == u"Andr x" +assert unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x' + +assert u'hello'.encode('ascii') == 'hello' +assert u'hello'.encode('utf-8') == 'hello' +assert u'hello'.encode('utf8') == 'hello' +assert u'hello'.encode('utf-16-le') == 'h\000e\000l\000l\000o\000' +assert u'hello'.encode('utf-16-be') == '\000h\000e\000l\000l\000o' +assert u'hello'.encode('latin-1') == 'hello' + +u = u''.join(map(unichr, range(1024))) +for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be', + 'raw_unicode_escape', 'unicode_escape', 'unicode_internal'): + assert unicode(u.encode(encoding),encoding) == u + +u = u''.join(map(unichr, range(256))) +for encoding in ( + 'latin-1', + ): + try: + assert unicode(u.encode(encoding),encoding) == u + except AssertionError: + print '*** codec "%s" failed round-trip' % encoding + except ValueError,why: + print '*** codec for "%s" failed: %s' % (encoding, why) + +u = u''.join(map(unichr, range(128))) +for encoding in ( + 'ascii', + ): + try: + assert unicode(u.encode(encoding),encoding) == u + except AssertionError: + print '*** codec "%s" failed round-trip' % encoding + except ValueError,why: + print '*** codec for "%s" failed: %s' % (encoding, why) + +print 'done.' + +print 'Testing standard mapping codecs...', + +print '0-127...', +s = ''.join(map(chr, range(128))) +for encoding in ( + 'cp037', 'cp1026', + 'cp437', 'cp500', 'cp737', 'cp775', 'cp850', + 'cp852', 'cp855', 'cp860', 'cp861', 'cp862', + 'cp863', 'cp865', 'cp866', + 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', + 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', + 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', + 'mac_cyrillic', 'mac_latin2', + + 'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', + 'cp1256', 'cp1257', 'cp1258', + 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', + + 'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', + 'cp1006', 'cp875', 'iso8859_8', + + ### These have undefined mappings: + #'cp424', + + ): + try: + assert unicode(s,encoding).encode(encoding) == s + except AssertionError: + print '*** codec "%s" failed round-trip' % encoding + except ValueError,why: + print '*** codec for "%s" failed: %s' % (encoding, why) + +print '128-255...', +s = ''.join(map(chr, range(128,256))) +for encoding in ( + 'cp037', 'cp1026', + 'cp437', 'cp500', 'cp737', 'cp775', 'cp850', + 'cp852', 'cp855', 'cp860', 'cp861', 'cp862', + 'cp863', 'cp865', 'cp866', + 'iso8859_10', 'iso8859_13', 'iso8859_14', 'iso8859_15', + 'iso8859_2', 'iso8859_3', 'iso8859_4', 'iso8859_5', 'iso8859_6', + 'iso8859_7', 'iso8859_9', 'koi8_r', 'latin_1', + 'mac_cyrillic', 'mac_latin2', + + ### These have undefined mappings: + #'cp1250', 'cp1251', 'cp1252', 'cp1253', 'cp1254', 'cp1255', + #'cp1256', 'cp1257', 'cp1258', + #'cp424', 'cp856', 'cp857', 'cp864', 'cp869', 'cp874', + #'mac_greek', 'mac_iceland','mac_roman', 'mac_turkish', + + ### These fail the round-trip: + #'cp1006', 'cp875', 'iso8859_8', + + ): + try: + assert unicode(s,encoding).encode(encoding) == s + except AssertionError: + print '*** codec "%s" failed round-trip' % encoding + except ValueError,why: + print '*** codec for "%s" failed: %s' % (encoding, why) + +print 'done.' + +print 'Testing Unicode string concatenation...', +assert (u"abc" u"def") == u"abcdef" +assert ("abc" u"def") == u"abcdef" +assert (u"abc" "def") == u"abcdef" +assert (u"abc" u"def" "ghi") == u"abcdefghi" +assert ("abc" "def" u"ghi") == u"abcdefghi" +print 'done.' diff --git a/Lib/dos-8x3/test_win.py b/Lib/dos-8x3/test_win.py new file mode 100644 index 0000000..1af095a --- /dev/null +++ b/Lib/dos-8x3/test_win.py @@ -0,0 +1,147 @@ +# Test the windows specific win32reg module. +# Only win32reg functions not hit here: FlushKey, LoadKey and SaveKey + +from winreg import * +import os, sys + +test_key_name = "SOFTWARE\\Python Registry Test Key - Delete Me" + +test_data = [ + ("Int Value", 45, REG_DWORD), + ("String Val", "A string value", REG_SZ,), + (u"Unicode Val", u"A Unicode value", REG_SZ,), + ("StringExpand", "The path is %path%", REG_EXPAND_SZ), + ("UnicodeExpand", u"The path is %path%", REG_EXPAND_SZ), + ("Multi-string", ["Lots", "of", "string", "values"], REG_MULTI_SZ), + ("Multi-unicode", [u"Lots", u"of", u"unicode", u"values"], REG_MULTI_SZ), + ("Multi-mixed", [u"Unicode", u"and", "string", "values"],REG_MULTI_SZ), + ("Raw Data", ("binary"+chr(0)+"data"), REG_BINARY), +] + +def WriteTestData(root_key): + # Set the default value for this key. + SetValue(root_key, test_key_name, REG_SZ, "Default value") + key = CreateKey(root_key, test_key_name) + # Create a sub-key + sub_key = CreateKey(key, "sub_key") + # Give the sub-key some named values + + for value_name, value_data, value_type in test_data: + SetValueEx(sub_key, value_name, 0, value_type, value_data) + + # Check we wrote as many items as we thought. + nkeys, nvalues, since_mod = QueryInfoKey(key) + assert nkeys==1, "Not the correct number of sub keys" + assert nvalues==1, "Not the correct number of values" + nkeys, nvalues, since_mod = QueryInfoKey(sub_key) + assert nkeys==0, "Not the correct number of sub keys" + assert nvalues==len(test_data), "Not the correct number of values" + # Close this key this way... + # (but before we do, copy the key as an integer - this allows + # us to test that the key really gets closed). + int_sub_key = int(sub_key) + CloseKey(sub_key) + try: + QueryInfoKey(int_sub_key) + raise RuntimeError, "It appears the CloseKey() function does not close the actual key!" + except EnvironmentError: + pass + # ... and close that key that way :-) + int_key = int(key) + key.Close() + try: + QueryInfoKey(int_key) + raise RuntimeError, "It appears the key.Close() function does not close the actual key!" + except EnvironmentError: + pass + +def ReadTestData(root_key): + # Check we can get default value for this key. + val = QueryValue(root_key, test_key_name) + assert val=="Default value", "Registry didnt give back the correct value" + + key = OpenKey(root_key, test_key_name) + # Read the sub-keys + sub_key = OpenKey(key, "sub_key") + # Check I can enumerate over the values. + index = 0 + while 1: + try: + data = EnumValue(sub_key, index) + except EnvironmentError: + break + assert data in test_data, "didnt read back the correct test data." + index = index + 1 + assert index==len(test_data), "Didnt read the correct number of items" + # Check I can directly access each item + for value_name, value_data, value_type in test_data: + read_val, read_typ = QueryValueEx(sub_key, value_name) + assert read_val==value_data and read_typ == value_type, \ + "Could not directly read the value" + sub_key.Close() + # Enumerate our main key. + read_val = EnumKey(key, 0) + assert read_val == "sub_key", "Read subkey value wrong" + try: + EnumKey(key, 1) + assert 0, "Was able to get a second key when I only have one!" + except EnvironmentError: + pass + + key.Close() + +def DeleteTestData(root_key): + key = OpenKey(root_key, test_key_name, 0, KEY_ALL_ACCESS) + sub_key = OpenKey(key, "sub_key", 0, KEY_ALL_ACCESS) + # It is not necessary to delete the values before deleting + # the key (although subkeys must not exist). We delete them + # manually just to prove we can :-) + for value_name, value_data, value_type in test_data: + DeleteValue(sub_key, value_name) + + nkeys, nvalues, since_mod = QueryInfoKey(sub_key) + assert nkeys==0 and nvalues==0, "subkey not empty before delete" + sub_key.Close() + DeleteKey(key, "sub_key") + + try: + # Shouldnt be able to delete it twice! + DeleteKey(key, "sub_key") + assert 0, "Deleting the key twice succeeded" + except EnvironmentError: + pass + key.Close() + DeleteKey(root_key, test_key_name) + # Opening should now fail! + try: + key = OpenKey(root_key, test_key_name) + assert 0, "Could open the non-existant key" + except WindowsError: # Use this error name this time + pass + +def TestAll(root_key): + WriteTestData(root_key) + ReadTestData(root_key) + DeleteTestData(root_key) + +# Test on my local machine. +TestAll(HKEY_CURRENT_USER) +print "Local registry tests worked" +try: + remote_name = sys.argv[sys.argv.index("--remote")+1] +except (IndexError, ValueError): + remote_name = None + +if remote_name is not None: + try: + remote_key = ConnectRegistry(remote_name, HKEY_CURRENT_USER) + except EnvironmentError, exc: + print "Could not connect to the remote machine -", exc.strerror + remote_key = None + if remote_key is not None: + TestAll(remote_key) + print "Remote registry tests worked" +else: + print "Remote registry calls can be tested using", + print "'test_winreg.py --remote \\\\machine_name'" + diff --git a/Lib/dos-8x3/test_zip.py b/Lib/dos-8x3/test_zip.py new file mode 100644 index 0000000..3ffdd19 --- /dev/null +++ b/Lib/dos-8x3/test_zip.py @@ -0,0 +1,26 @@ +import zipfile, os + +srcname = "junk9630.tmp" +zipname = "junk9708.tmp" + +try: + fp = open(srcname, "w") # Make a source file with some lines + for i in range(0, 1000): + fp.write("Test of zipfile line %d.\n" % i) + fp.close() + + zip = zipfile.ZipFile(zipname, "w") # Create the ZIP archive + zip.write(srcname, srcname) + zip.write(srcname, "another.name") + zip.close() + + zip = zipfile.ZipFile(zipname, "r") # Read the ZIP archive + zip.read("another.name") + zip.read(srcname) + zip.close() +finally: + if os.path.isfile(srcname): # Remove temporary files + os.unlink(srcname) + if os.path.isfile(zipname): + os.unlink(zipname) + diff --git a/Lib/dos-8x3/test_zli.py b/Lib/dos-8x3/test_zli.py index 719b0e7..ccfbc7d 100644 --- a/Lib/dos-8x3/test_zli.py +++ b/Lib/dos-8x3/test_zli.py @@ -80,14 +80,14 @@ else: # in order to provide more variations. for sync in [zlib.Z_NO_FLUSH, zlib.Z_SYNC_FLUSH, zlib.Z_FULL_FLUSH]: for level in range(10): - obj = zlib.compressobj( level ) - d = obj.compress( buf[:3000] ) - d = d + obj.flush( sync ) - d = d + obj.compress( buf[3000:] ) - d = d + obj.flush() - if zlib.decompress(d) != buf: - print "Decompress failed: flush mode=%i, level=%i" % (sync,level) - del obj + obj = zlib.compressobj( level ) + d = obj.compress( buf[:3000] ) + d = d + obj.flush( sync ) + d = d + obj.compress( buf[3000:] ) + d = d + obj.flush() + if zlib.decompress(d) != buf: + print "Decompress failed: flush mode=%i, level=%i" % (sync,level) + del obj def ignore(): """An empty function with a big string. diff --git a/Lib/dos-8x3/threadin.py b/Lib/dos-8x3/threadin.py index 767df45..ded573a 100644 --- a/Lib/dos-8x3/threadin.py +++ b/Lib/dos-8x3/threadin.py @@ -1,5 +1,4 @@ -# threading.py: -# Proposed new threading module, emulating a subset of Java's threading model +"""Proposed new threading module, emulating a subset of Java's threading model.""" import sys import time @@ -238,7 +237,7 @@ def Semaphore(*args, **kwargs): class _Semaphore(_Verbose): - # After Tim Peters' semaphore class, but bnot quite the same (no maximum) + # After Tim Peters' semaphore class, but not quite the same (no maximum) def __init__(self, value=1, verbose=None): assert value >= 0, "Semaphore initial value must be >= 0" @@ -506,7 +505,7 @@ class _DummyThread(Thread): def __init__(self): Thread.__init__(self, name=_newname("Dummy-%d")) - self.__Thread_started = 1 + self._Thread__started = 1 _active_limbo_lock.acquire() _active[_get_ident()] = self _active_limbo_lock.release() diff --git a/Lib/dos-8x3/threadst.py b/Lib/dos-8x3/threadst.py new file mode 100644 index 0000000..c2b08f2 --- /dev/null +++ b/Lib/dos-8x3/threadst.py @@ -0,0 +1,9 @@ +import thread +# Start empty thread to initialise thread mechanics (and global lock!) +# This thread will finish immediately thus won't make much influence on +# test results by itself, only by that fact that it initialises global lock +thread.start_new_thread(lambda : 1, ()) + +import test.pystone +test.pystone.main() + diff --git a/Lib/dos-8x3/tracebac.py b/Lib/dos-8x3/tracebac.py index 70d3230..4675077 100755 --- a/Lib/dos-8x3/tracebac.py +++ b/Lib/dos-8x3/tracebac.py @@ -1,4 +1,4 @@ -# Format and print Python stack traces +"""Extract, format and print information about Python stack traces.""" import linecache import string @@ -10,6 +10,8 @@ def _print(file, str='', terminator='\n'): def print_list(extracted_list, file=None): + """Print the list of tuples as returned by extract_tb() or + extract_stack() as a formatted stack trace to the given file.""" if not file: file = sys.stderr for filename, lineno, name, line in extracted_list: @@ -19,6 +21,12 @@ def print_list(extracted_list, file=None): _print(file, ' %s' % string.strip(line)) def format_list(extracted_list): + """Given a list of tuples as returned by extract_tb() or + extract_stack(), return a list of strings ready for printing. + Each string in the resulting list corresponds to the item with + the same index in the argument list. Each string ends in a + newline; the strings may contain internal newlines as well, for + those items whose source text line is not None.""" list = [] for filename, lineno, name, line in extracted_list: item = ' File "%s", line %d, in %s\n' % (filename,lineno,name) @@ -29,6 +37,10 @@ def format_list(extracted_list): def print_tb(tb, limit=None, file=None): + """Print up to 'limit' stack trace entries from the traceback 'tb'. + If 'limit' is omitted or None, all entries are printed. If 'file' is + omitted or None, the output goes to sys.stderr; otherwise 'file' + should be an open file or file-like object with a write() method.""" if not file: file = sys.stderr if limit is None: @@ -49,9 +61,18 @@ def print_tb(tb, limit=None, file=None): n = n+1 def format_tb(tb, limit = None): + """A shorthand for 'format_list(extract_stack(f, limit)).""" return format_list(extract_tb(tb, limit)) def extract_tb(tb, limit = None): + """Return a list of up to 'limit' pre-processed stack trace entries + extracted from the traceback object 'traceback'. This is useful for + alternate formatting of stack traces. If 'limit' is omitted or None, + all entries are extracted. A pre-processed stack trace entry is a + quadruple (filename, line number, function name, text) representing + the information that is usually printed for a stack trace. The text + is a string with leading and trailing whitespace stripped; if the + source is not available it is None.""" if limit is None: if hasattr(sys, 'tracebacklimit'): limit = sys.tracebacklimit @@ -73,10 +94,18 @@ def extract_tb(tb, limit = None): def print_exception(etype, value, tb, limit=None, file=None): + """Print exception information and up to 'limit' stack trace entries + from the traceback 'tb' to 'file'. This differs from print_tb() in + the following ways: (1) if traceback is not None, it prints a header + "Traceback (most recent call last):"; (2) it prints the exception type and + value after the stack trace; (3) if type is SyntaxError and value has + the appropriate format, it prints the line where the syntax error + occurred with a caret on the next line indicating the approximate + position of the error.""" if not file: file = sys.stderr if tb: - _print(file, 'Traceback (innermost last):') + _print(file, 'Traceback (most recent call last):') print_tb(tb, limit, file) lines = format_exception_only(etype, value) for line in lines[:-1]: @@ -84,8 +113,14 @@ def print_exception(etype, value, tb, limit=None, file=None): _print(file, lines[-1], '') def format_exception(etype, value, tb, limit = None): + """Format a stack trace and the exception information. The arguments + have the same meaning as the corresponding arguments to + print_exception(). The return value is a list of strings, each + ending in a newline and some containing internal newlines. When + these lines are contatenated and printed, exactly the same text is + printed as does print_exception().""" if tb: - list = ['Traceback (innermost last):\n'] + list = ['Traceback (most recent call last):\n'] list = list + format_tb(tb, limit) else: list = [] @@ -93,6 +128,14 @@ def format_exception(etype, value, tb, limit = None): return list def format_exception_only(etype, value): + """Format the exception part of a traceback. The arguments are the + exception type and value such as given by sys.last_type and + sys.last_value. The return value is a list of strings, each ending + in a newline. Normally, the list contains a single string; + however, for SyntaxError exceptions, it contains several lines that + (when printed) display detailed information about where the syntax + error occurred. The message indicating which exception occurred is + the always last string in the list.""" list = [] if type(etype) == types.ClassType: stype = etype.__name__ @@ -128,6 +171,10 @@ def format_exception_only(etype, value): def print_exc(limit=None, file=None): + """This is a shorthand for 'print_exception(sys.exc_type, + sys.exc_value, sys.exc_traceback, limit, file)'. + (In fact, it uses sys.exc_info() to retrieve the same information + in a thread-safe way.)""" if not file: file = sys.stderr try: @@ -137,6 +184,8 @@ def print_exc(limit=None, file=None): etype = value = tb = None def print_last(limit=None, file=None): + """This is a shorthand for 'print_exception(sys.last_type, + sys.last_value, sys.last_traceback, limit, file)'.""" if not file: file = sys.stderr print_exception(sys.last_type, sys.last_value, sys.last_traceback, @@ -144,6 +193,10 @@ def print_last(limit=None, file=None): def print_stack(f=None, limit=None, file=None): + """This function prints a stack trace from its invocation point. + The optional 'f' argument can be used to specify an alternate stack + frame at which to start. The optional 'limit' and 'file' arguments + have the same meaning as for print_exception().""" if f is None: try: raise ZeroDivisionError @@ -152,6 +205,7 @@ def print_stack(f=None, limit=None, file=None): print_list(extract_stack(f, limit), file) def format_stack(f=None, limit=None): + """A shorthand for 'format_list(extract_stack(f, limit))'.""" if f is None: try: raise ZeroDivisionError @@ -160,6 +214,12 @@ def format_stack(f=None, limit=None): return format_list(extract_stack(f, limit)) def extract_stack(f=None, limit = None): + """Extract the raw traceback from the current stack frame. The + return value has the same format as for extract_tb(). The optional + 'f' and 'limit' arguments have the same meaning as for print_stack(). + Each item in the list is a quadruple (filename, line number, + function name, text), and the entries are in order from oldest + to newest stack frame.""" if f is None: try: raise ZeroDivisionError @@ -184,13 +244,14 @@ def extract_stack(f=None, limit = None): list.reverse() return list -# Calculate the correct line number of the traceback given in tb (even -# with -O on). -# Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line() -# in compile.c. -# Revised version by Jim Hugunin to work with JPython too. - def tb_lineno(tb): + """Calculate the correct line number of the traceback given in tb + (even with -O on).""" + + # Coded by Marc-Andre Lemburg from the example of PyCode_Addr2Line() + # in compile.c. + # Revised version by Jim Hugunin to work with JPython too. + c = tb.tb_frame.f_code if not hasattr(c, 'co_lnotab'): return tb.tb_lineno diff --git a/Lib/dos-8x3/userdict.py b/Lib/dos-8x3/userdict.py index 50fee89..3c48415 100755 --- a/Lib/dos-8x3/userdict.py +++ b/Lib/dos-8x3/userdict.py @@ -1,4 +1,4 @@ -# A more or less complete user-defined wrapper around dictionary objects +"""A more or less complete user-defined wrapper around dictionary objects.""" class UserDict: def __init__(self, dict=None): diff --git a/Lib/dos-8x3/userlist.py b/Lib/dos-8x3/userlist.py index a60d8ce..7bd0298 100755 --- a/Lib/dos-8x3/userlist.py +++ b/Lib/dos-8x3/userlist.py @@ -1,13 +1,16 @@ -# A more or less complete user-defined wrapper around list objects +"""A more or less complete user-defined wrapper around list objects.""" class UserList: - def __init__(self, list=None): + def __init__(self, initlist=None): self.data = [] - if list is not None: - if type(list) == type(self.data): - self.data[:] = list + if initlist is not None: + # XXX should this accept an arbitary sequence? + if type(initlist) == type(self.data): + self.data[:] = initlist + elif isinstance(initlist, UserList): + self.data[:] = initlist.data[:] else: - self.data[:] = list.data[:] + self.data = list(initlist) def __repr__(self): return repr(self.data) def __cmp__(self, other): if isinstance(other, UserList): diff --git a/Lib/dos-8x3/userstri.py b/Lib/dos-8x3/userstri.py new file mode 100644 index 0000000..528065e --- /dev/null +++ b/Lib/dos-8x3/userstri.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +## vim:ts=4:et:nowrap +"""A user-defined wrapper around string objects + +Note: string objects have grown methods in Python 1.6 +This module requires Python 1.6 or later. +""" +from types import StringType, UnicodeType +import sys + +class UserString: + def __init__(self, seq): + if isinstance(seq, StringType) or isinstance(seq, UnicodeType): + self.data = seq + elif isinstance(seq, UserString): + self.data = seq.data[:] + else: + self.data = str(seq) + def __str__(self): return str(self.data) + def __repr__(self): return repr(self.data) + def __int__(self): return int(self.data) + def __long__(self): return long(self.data) + def __float__(self): return float(self.data) + def __complex__(self): return complex(self.data) + def __hash__(self): return hash(self.data) + + def __cmp__(self, string): + if isinstance(string, UserString): + return cmp(self.data, string.data) + else: + return cmp(self.data, string) + def __contains__(self, char): + return char in self.data + + def __len__(self): return len(self.data) + def __getitem__(self, index): return self.__class__(self.data[index]) + def __getslice__(self, start, end): + start = max(start, 0); end = max(end, 0) + return self.__class__(self.data[start:end]) + + def __add__(self, other): + if isinstance(other, UserString): + return self.__class__(self.data + other.data) + elif isinstance(other, StringType) or isinstance(other, UnicodeType): + return self.__class__(self.data + other) + else: + return self.__class__(self.data + str(other)) + def __radd__(self, other): + if isinstance(other, StringType) or isinstance(other, UnicodeType): + return self.__class__(other + self.data) + else: + return self.__class__(str(other) + self.data) + def __mul__(self, n): + return self.__class__(self.data*n) + __rmul__ = __mul__ + + # the following methods are defined in alphabetical order: + def capitalize(self): return self.__class__(self.data.capitalize()) + def center(self, width): return self.__class__(self.data.center(width)) + def count(self, sub, start=0, end=sys.maxint): + return self.data.count(sub, start, end) + def encode(self, encoding=None, errors=None): # XXX improve this? + if encoding: + if errors: + return self.__class__(self.data.encode(encoding, errors)) + else: + return self.__class__(self.data.encode(encoding)) + else: + return self.__class__(self.data.encode()) + def endswith(self, suffix, start=0, end=sys.maxint): + return self.data.endswith(suffix, start, end) + def expandtabs(self, tabsize=8): + return self.__class__(self.data.expandtabs(tabsize)) + def find(self, sub, start=0, end=sys.maxint): + return self.data.find(sub, start, end) + def index(self, sub, start=0, end=sys.maxint): + return self.data.index(sub, start, end) + def isdecimal(self): return self.data.isdecimal() + def isdigit(self): return self.data.isdigit() + def islower(self): return self.data.islower() + def isnumeric(self): return self.data.isnumeric() + def isspace(self): return self.data.isspace() + def istitle(self): return self.data.istitle() + def isupper(self): return self.data.isupper() + def join(self, seq): return self.data.join(seq) + def ljust(self, width): return self.__class__(self.data.ljust(width)) + def lower(self): return self.__class__(self.data.lower()) + def lstrip(self): return self.__class__(self.data.lstrip()) + def replace(self, old, new, maxsplit=-1): + return self.__class__(self.data.replace(old, new, maxsplit)) + def rfind(self, sub, start=0, end=sys.maxint): + return self.data.rfind(sub, start, end) + def rindex(self, sub, start=0, end=sys.maxint): + return self.data.rindex(sub, start, end) + def rjust(self, width): return self.__class__(self.data.rjust(width)) + def rstrip(self): return self.__class__(self.data.rstrip()) + def split(self, sep=None, maxsplit=-1): + return self.data.split(sep, maxsplit) + def splitlines(self, keepends=0): return self.data.splitlines(keepends) + def startswith(self, prefix, start=0, end=sys.maxint): + return self.data.startswith(prefix, start, end) + def strip(self): return self.__class__(self.data.strip()) + def swapcase(self): return self.__class__(self.data.swapcase()) + def title(self): return self.__class__(self.data.title()) + def translate(self, table, deletechars=""): + return self.__class__(self.data.translate(table, deletechars)) + def upper(self): return self.__class__(self.data.upper()) + +class MutableString(UserString): + """mutable string objects + + Python strings are immutable objects. This has the advantage, that + strings may be used as dictionary keys. If this property isn't needed + and you insist on changing string values in place instead, you may cheat + and use MutableString. + + But the purpose of this class is an educational one: to prevent + people from inventing their own mutable string class derived + from UserString and than forget thereby to remove (override) the + __hash__ method inherited from ^UserString. This would lead to + errors that would be very hard to track down. + + A faster and better solution is to rewrite your program using lists.""" + def __init__(self, string=""): + self.data = string + def __hash__(self): + raise TypeError, "unhashable type (it is mutable)" + def __setitem__(self, index, sub): + if index < 0 or index >= len(self.data): raise IndexError + self.data = self.data[:index] + sub + self.data[index+1:] + def __delitem__(self, index): + if index < 0 or index >= len(self.data): raise IndexError + self.data = self.data[:index] + self.data[index+1:] + def __setslice__(self, start, end, sub): + start = max(start, 0); end = max(end, 0) + if isinstance(sub, UserString): + self.data = self.data[:start]+sub.data+self.data[end:] + elif isinstance(sub, StringType) or isinstance(sub, UnicodeType): + self.data = self.data[:start]+sub+self.data[end:] + else: + self.data = self.data[:start]+str(sub)+self.data[end:] + def __delslice__(self, start, end): + start = max(start, 0); end = max(end, 0) + self.data = self.data[:start] + self.data[end:] + def immutable(self): + return UserString(self.data) + +if __name__ == "__main__": + # execute the regression test to stdout, if called as a script: + import os + called_in_dir, called_as = os.path.split(sys.argv[0]) + called_in_dir = os.path.abspath(called_in_dir) + called_as, py = os.path.splitext(called_as) + sys.path.append(os.path.join(called_in_dir, 'test')) + if '-q' in sys.argv: + import test_support + test_support.verbose = 0 + __import__('test_' + called_as.lower()) |