diff options
author | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-29 15:03:40 (GMT) |
---|---|---|
committer | Antoine Pitrou <solipsis@pitrou.net> | 2010-09-29 15:03:40 (GMT) |
commit | 69ab95105f5105b8337c757346c899d6c7e7d9bb (patch) | |
tree | ae499f9e4041d0d244368932a1d4b4c0b6cd7a49 /Lib/nntplib.py | |
parent | 926f0da582a01f5c03bcc05919f5dbb6da37c01a (diff) | |
download | cpython-69ab95105f5105b8337c757346c899d6c7e7d9bb.zip cpython-69ab95105f5105b8337c757346c899d6c7e7d9bb.tar.gz cpython-69ab95105f5105b8337c757346c899d6c7e7d9bb.tar.bz2 |
Issue #9360: Cleanup and improvements to the nntplib module. The API
now conforms to the philosophy of bytes and unicode separation in Python 3.
A test suite has also been added.
Diffstat (limited to 'Lib/nntplib.py')
-rw-r--r-- | Lib/nntplib.py | 946 |
1 files changed, 656 insertions, 290 deletions
diff --git a/Lib/nntplib.py b/Lib/nntplib.py index 6ac2fcb..1055d06 100644 --- a/Lib/nntplib.py +++ b/Lib/nntplib.py @@ -1,4 +1,7 @@ -"""An NNTP client class based on RFC 977: Network News Transfer Protocol. +"""An NNTP client class based on: +- RFC 977: Network News Transfer Protocol +- RFC 2980: Common NNTP Extensions +- RFC 3977: Network News Transfer Protocol (version 2) Example: @@ -27,15 +30,53 @@ are strings, not numbers, since they are rarely used for calculations. # RFC 977 by Brian Kantor and Phil Lapsley. # xover, xgtitle, xpath, date methods by Kevan Heydon +# Incompatible changes from the 2.x nntplib: +# - all commands are encoded as UTF-8 data (using the "surrogateescape" +# error handler), except for raw message data (POST, IHAVE) +# - all responses are decoded as UTF-8 data (using the "surrogateescape" +# error handler), except for raw message data (ARTICLE, HEAD, BODY) +# - the `file` argument to various methods is keyword-only +# +# - NNTP.date() returns a datetime object +# - NNTP.newgroups() and NNTP.newnews() take a datetime (or date) object, +# rather than a pair of (date, time) strings. +# - NNTP.newgroups() and NNTP.list() return a list of GroupInfo named tuples +# - NNTP.descriptions() returns a dict mapping group names to descriptions +# - NNTP.xover() returns a list of dicts mapping field names (header or metadata) +# to field values; each dict representing a message overview. +# - NNTP.article(), NNTP.head() and NNTP.body() return a (response, ArticleInfo) +# tuple. +# - the "internal" methods have been marked private (they now start with +# an underscore) + +# Other changes from the 2.x/3.1 nntplib: +# - automatic querying of capabilities at connect +# - New method NNTP.getcapabilities() +# - New method NNTP.over() +# - New helper function decode_header() +# - NNTP.post() and NNTP.ihave() accept file objects, bytes-like objects and +# arbitrary iterables yielding lines. +# - An extensive test suite :-) + +# TODO: +# - return structured data (GroupInfo etc.) everywhere +# - support HDR # Imports import re import socket +import collections +import datetime +import warnings -__all__ = ["NNTP","NNTPReplyError","NNTPTemporaryError", - "NNTPPermanentError","NNTPProtocolError","NNTPDataError", - "error_reply","error_temp","error_perm","error_proto", - "error_data",] +from email.header import decode_header as _email_decode_header +from socket import _GLOBAL_DEFAULT_TIMEOUT + +__all__ = ["NNTP", + "NNTPReplyError", "NNTPTemporaryError", "NNTPPermanentError", + "NNTPProtocolError", "NNTPDataError", + "decode_header", + ] # Exceptions raised when an error or invalid response is received class NNTPError(Exception): @@ -67,39 +108,189 @@ class NNTPDataError(NNTPError): """Error in response data""" pass -# for backwards compatibility -error_reply = NNTPReplyError -error_temp = NNTPTemporaryError -error_perm = NNTPPermanentError -error_proto = NNTPProtocolError -error_data = NNTPDataError - - # Standard port used by NNTP servers NNTP_PORT = 119 # Response numbers that are followed by additional text (e.g. article) -LONGRESP = [b'100', b'215', b'220', b'221', b'222', b'224', b'230', b'231', b'282'] - +_LONGRESP = { + '100', # HELP + '101', # CAPABILITIES + '211', # LISTGROUP (also not multi-line with GROUP) + '215', # LIST + '220', # ARTICLE + '221', # HEAD, XHDR + '222', # BODY + '224', # OVER, XOVER + '225', # HDR + '230', # NEWNEWS + '231', # NEWGROUPS + '282', # XGTITLE +} + +# Default decoded value for LIST OVERVIEW.FMT if not supported +_DEFAULT_OVERVIEW_FMT = [ + "subject", "from", "date", "message-id", "references", ":bytes", ":lines"] + +# Alternative names allowed in LIST OVERVIEW.FMT response +_OVERVIEW_FMT_ALTERNATIVES = { + 'bytes': ':bytes', + 'lines': ':lines', +} # Line terminators (we always output CRLF, but accept any of CRLF, CR, LF) -CRLF = b'\r\n' +_CRLF = b'\r\n' +GroupInfo = collections.namedtuple('GroupInfo', + ['group', 'last', 'first', 'flag']) +ArticleInfo = collections.namedtuple('ArticleInfo', + ['number', 'message_id', 'lines']) -# The class itself -class NNTP: - def __init__(self, host, port=NNTP_PORT, user=None, password=None, - readermode=None, usenetrc=True): + +# Helper function(s) +def decode_header(header_str): + """Takes an unicode string representing a munged header value + and decodes it as a (possibly non-ASCII) readable value.""" + parts = [] + for v, enc in _email_decode_header(header_str): + if isinstance(v, bytes): + parts.append(v.decode(enc or 'ascii')) + else: + parts.append(v) + return ' '.join(parts) + +def _parse_overview_fmt(lines): + """Parse a list of string representing the response to LIST OVERVIEW.FMT + and return a list of header/metadata names. + Raises NNTPDataError if the response is not compliant + (cf. RFC 3977, section 8.4).""" + fmt = [] + for line in lines: + if line[0] == ':': + # Metadata name (e.g. ":bytes") + name, _, suffix = line[1:].partition(':') + name = ':' + name + else: + # Header name (e.g. "Subject:" or "Xref:full") + name, _, suffix = line.partition(':') + name = name.lower() + name = _OVERVIEW_FMT_ALTERNATIVES.get(name, name) + # Should we do something with the suffix? + fmt.append(name) + defaults = _DEFAULT_OVERVIEW_FMT + if len(fmt) < len(defaults): + raise NNTPDataError("LIST OVERVIEW.FMT response too short") + if fmt[:len(defaults)] != defaults: + raise NNTPDataError("LIST OVERVIEW.FMT redefines default fields") + return fmt + +def _parse_overview(lines, fmt, data_process_func=None): + """Parse the response to a OVER or XOVER command according to the + overview format `fmt`.""" + n_defaults = len(_DEFAULT_OVERVIEW_FMT) + overview = [] + for line in lines: + fields = {} + article_number, *tokens = line.split('\t') + article_number = int(article_number) + for i, token in enumerate(tokens): + if i >= len(fmt): + # XXX should we raise an error? Some servers might not + # support LIST OVERVIEW.FMT and still return additional + # headers. + continue + field_name = fmt[i] + is_metadata = field_name.startswith(':') + if i >= n_defaults and not is_metadata: + # Non-default header names are included in full in the response + h = field_name + ":" + if token[:len(h)].lower() != h: + raise NNTPDataError("OVER/XOVER response doesn't include " + "names of additional headers") + token = token[len(h):].lstrip(" ") + fields[fmt[i]] = token + overview.append((article_number, fields)) + return overview + +def _parse_datetime(date_str, time_str=None): + """Parse a pair of (date, time) strings, and return a datetime object. + If only the date is given, it is assumed to be date and time + concatenated together (e.g. response to the DATE command). + """ + if time_str is None: + time_str = date_str[-6:] + date_str = date_str[:-6] + hours = int(time_str[:2]) + minutes = int(time_str[2:4]) + seconds = int(time_str[4:]) + year = int(date_str[:-4]) + month = int(date_str[-4:-2]) + day = int(date_str[-2:]) + # RFC 3977 doesn't say how to interpret 2-char years. Assume that + # there are no dates before 1970 on Usenet. + if year < 70: + year += 2000 + elif year < 100: + year += 1900 + return datetime.datetime(year, month, day, hours, minutes, seconds) + +def _unparse_datetime(dt, legacy=False): + """Format a date or datetime object as a pair of (date, time) strings + in the format required by the NEWNEWS and NEWGROUPS commands. If a + date object is passed, the time is assumed to be midnight (00h00). + + The returned representation depends on the legacy flag: + * if legacy is False (the default): + date has the YYYYMMDD format and time the HHMMSS format + * if legacy is True: + date has the YYMMDD format and time the HHMMSS format. + RFC 3977 compliant servers should understand both formats; therefore, + legacy is only needed when talking to old servers. + """ + if not isinstance(dt, datetime.datetime): + time_str = "000000" + else: + time_str = "{0.hour:02d}{0.minute:02d}{0.second:02d}".format(dt) + y = dt.year + if legacy: + y = y % 100 + date_str = "{0:02d}{1.month:02d}{1.day:02d}".format(y, dt) + else: + date_str = "{0:04d}{1.month:02d}{1.day:02d}".format(y, dt) + return date_str, time_str + + +# The classes themselves +class _NNTPBase: + # UTF-8 is the character set for all NNTP commands and responses: they + # are automatically encoded (when sending) and decoded (and receiving) + # by this class. + # However, some multi-line data blocks can contain arbitrary bytes (for + # example, latin-1 or utf-16 data in the body of a message). Commands + # taking (POST, IHAVE) or returning (HEAD, BODY, ARTICLE) raw message + # data will therefore only accept and produce bytes objects. + # Furthermore, since there could be non-compliant servers out there, + # we use 'surrogateescape' as the error handler for fault tolerance + # and easy round-tripping. This could be useful for some applications + # (e.g. NNTP gateways). + + encoding = 'utf-8' + errors = 'surrogateescape' + + def __init__(self, file, user=None, password=None, + readermode=None, usenetrc=True, + timeout=_GLOBAL_DEFAULT_TIMEOUT): """Initialize an instance. Arguments: - - host: hostname to connect to - - port: port to connect to (default the standard NNTP port) + - file: file-like object (open for read/write in binary mode) - user: username to authenticate with - password: password to use with username - readermode: if true, send 'mode reader' command after connecting. + - usenetrc: allow loading username and password from ~/.netrc file + if not specified explicitly + - timeout: timeout (in seconds) used for socket connections readermode is sometimes necessary if you are connecting to an NNTP server on the local machine and intend to call @@ -107,12 +298,9 @@ class NNTP: unexpected NNTPPermanentErrors, you might need to set readermode. """ - self.host = host - self.port = port - self.sock = socket.create_connection((host, port)) - self.file = self.sock.makefile('rb') + self.file = file self.debugging = 0 - self.welcome = self.getresp() + self.welcome = self._getresp() # 'mode reader' is sometimes necessary to enable 'reader' mode. # However, the order in which 'mode reader' and 'authinfo' need to @@ -122,12 +310,12 @@ class NNTP: readermode_afterauth = 0 if readermode: try: - self.welcome = self.shortcmd('mode reader') + self.welcome = self._shortcmd('mode reader') except NNTPPermanentError: # error 500, probably 'not implemented' pass except NNTPTemporaryError as e: - if user and e.response.startswith(b'480'): + if user and e.response.startswith('480'): # Need authorization before 'mode reader' readermode_afterauth = 1 else: @@ -144,29 +332,35 @@ class NNTP: password = auth[2] except IOError: pass - # Perform NNRP authentication if needed. + # Perform NNTP authentication if needed. if user: - resp = self.shortcmd('authinfo user '+user) - if resp.startswith(b'381'): + resp = self._shortcmd('authinfo user '+user) + if resp.startswith('381'): if not password: raise NNTPReplyError(resp) else: - resp = self.shortcmd( + resp = self._shortcmd( 'authinfo pass '+password) - if not resp.startswith(b'281'): + if not resp.startswith('281'): raise NNTPPermanentError(resp) if readermode_afterauth: try: - self.welcome = self.shortcmd('mode reader') + self.welcome = self._shortcmd('mode reader') except NNTPPermanentError: # error 500, probably 'not implemented' pass - - # Get the welcome message from the server - # (this is read and squirreled away by __init__()). - # If the response code is 200, posting is allowed; - # if it 201, posting is not allowed + # Inquire about capabilities (RFC 3977) + self.nntp_version = 1 + try: + resp, caps = self.capabilities() + except NNTPPermanentError: + # Server doesn't support capabilities + self._caps = {} + else: + self._caps = caps + if 'VERSION' in caps: + self.nntp_version = int(caps['VERSION'][0]) def getwelcome(self): """Get the welcome message from the server @@ -177,6 +371,12 @@ class NNTP: if self.debugging: print('*welcome*', repr(self.welcome)) return self.welcome + def getcapabilities(self): + """Get the server capabilities, as read by __init__(). + If the CAPABILITIES command is not supported, an empty dict is + returned.""" + return self._caps + def set_debuglevel(self, level): """Set the debugging level. Argument 'level' means: 0: no debugging output (default) @@ -186,121 +386,221 @@ class NNTP: self.debugging = level debug = set_debuglevel - def putline(self, line): - """Internal: send one line to the server, appending CRLF.""" - line = line + CRLF + def _putline(self, line): + """Internal: send one line to the server, appending CRLF. + The `line` must be a bytes-like object.""" + line = line + _CRLF if self.debugging > 1: print('*put*', repr(line)) - self.sock.sendall(line) + self.file.write(line) + self.file.flush() - def putcmd(self, line): - """Internal: send one command to the server (through putline()).""" + def _putcmd(self, line): + """Internal: send one command to the server (through _putline()). + The `line` must be an unicode string.""" if self.debugging: print('*cmd*', repr(line)) - line = bytes(line, "ASCII") - self.putline(line) + line = line.encode(self.encoding, self.errors) + self._putline(line) - def getline(self): - """Internal: return one line from the server, stripping CRLF. - Raise EOFError if the connection is closed.""" + def _getline(self, strip_crlf=True): + """Internal: return one line from the server, stripping _CRLF. + Raise EOFError if the connection is closed. + Returns a bytes object.""" line = self.file.readline() if self.debugging > 1: print('*get*', repr(line)) if not line: raise EOFError - if line[-2:] == CRLF: - line = line[:-2] - elif line[-1:] in CRLF: - line = line[:-1] + if strip_crlf: + if line[-2:] == _CRLF: + line = line[:-2] + elif line[-1:] in _CRLF: + line = line[:-1] return line - def getresp(self): + def _getresp(self): """Internal: get a response from the server. - Raise various errors if the response indicates an error.""" - resp = self.getline() + Raise various errors if the response indicates an error. + Returns an unicode string.""" + resp = self._getline() if self.debugging: print('*resp*', repr(resp)) + resp = resp.decode(self.encoding, self.errors) c = resp[:1] - if c == b'4': + if c == '4': raise NNTPTemporaryError(resp) - if c == b'5': + if c == '5': raise NNTPPermanentError(resp) - if c not in b'123': + if c not in '123': raise NNTPProtocolError(resp) return resp - def getlongresp(self, file=None): + def _getlongresp(self, file=None): """Internal: get a response plus following text from the server. - Raise various errors if the response indicates an error.""" + Raise various errors if the response indicates an error. + + Returns a (response, lines) tuple where `response` is an unicode + string and `lines` is a list of bytes objects. + If `file` is a file-like object, it must be open in binary mode. + """ openedFile = None try: # If a string was passed then open a file with that name - if isinstance(file, str): - openedFile = file = open(file, "w") + if isinstance(file, (str, bytes)): + openedFile = file = open(file, "wb") - resp = self.getresp() - if resp[:3] not in LONGRESP: + resp = self._getresp() + if resp[:3] not in _LONGRESP: raise NNTPReplyError(resp) - list = [] - while 1: - line = self.getline() - if line == b'.': - break - if line.startswith(b'..'): - line = line[1:] - if file: - file.write(line + b'\n') - else: - list.append(line) + + lines = [] + if file is not None: + # XXX lines = None instead? + terminators = (b'.' + _CRLF, b'.\n') + while 1: + line = self._getline(False) + if line in terminators: + break + if line.startswith(b'..'): + line = line[1:] + file.write(line) + else: + terminator = b'.' + while 1: + line = self._getline() + if line == terminator: + break + if line.startswith(b'..'): + line = line[1:] + lines.append(line) finally: # If this method created the file, then it must close it if openedFile: openedFile.close() - return resp, list + return resp, lines - def shortcmd(self, line): - """Internal: send a command and get the response.""" - self.putcmd(line) - return self.getresp() + def _shortcmd(self, line): + """Internal: send a command and get the response. + Same return value as _getresp().""" + self._putcmd(line) + return self._getresp() + + def _longcmd(self, line, file=None): + """Internal: send a command and get the response plus following text. + Same return value as _getlongresp().""" + self._putcmd(line) + return self._getlongresp(file) + + def _longcmdstring(self, line, file=None): + """Internal: send a command and get the response plus following text. + Same as _longcmd() and _getlongresp(), except that the returned `lines` + are unicode strings rather than bytes objects. + """ + self._putcmd(line) + resp, list = self._getlongresp(file) + return resp, [line.decode(self.encoding, self.errors) + for line in list] + + def _getoverviewfmt(self): + """Internal: get the overview format. Queries the server if not + already done, else returns the cached value.""" + try: + return self._cachedoverviewfmt + except AttributeError: + pass + try: + resp, lines = self._longcmdstring("LIST OVERVIEW.FMT") + except NNTPPermanentError: + # Not supported by server? + fmt = _DEFAULT_OVERVIEW_FMT[:] + else: + fmt = _parse_overview_fmt(lines) + self._cachedoverviewfmt = fmt + return fmt - def longcmd(self, line, file=None): - """Internal: send a command and get the response plus following text.""" - self.putcmd(line) - return self.getlongresp(file) + def _grouplist(self, lines): + # Parse lines into "group last first flag" + return [GroupInfo(*line.split()) for line in lines] - def newgroups(self, date, time, file=None): - """Process a NEWGROUPS command. Arguments: - - date: string 'yymmdd' indicating the date - - time: string 'hhmmss' indicating the time + def capabilities(self): + """Process a CAPABILITIES command. Not supported by all servers. Return: - resp: server response if successful - - list: list of newsgroup names""" - - return self.longcmd('NEWGROUPS ' + date + ' ' + time, file) + - caps: a dictionary mapping capability names to lists of tokens + (for example {'VERSION': ['2'], 'OVER': [], LIST: ['ACTIVE', 'HEADERS'] }) + """ + caps = {} + resp, lines = self._longcmdstring("CAPABILITIES") + for line in lines: + name, *tokens = line.split() + caps[name] = tokens + return resp, caps - def newnews(self, group, date, time, file=None): + def newgroups(self, date, *, file=None): + """Process a NEWGROUPS command. Arguments: + - date: a date or datetime object + Return: + - resp: server response if successful + - list: list of newsgroup names + """ + if not isinstance(date, (datetime.date, datetime.date)): + raise TypeError( + "the date parameter must be a date or datetime object, " + "not '{:40}'".format(date.__class__.__name__)) + date_str, time_str = _unparse_datetime(date, self.nntp_version < 2) + cmd = 'NEWGROUPS {0} {1}'.format(date_str, time_str) + resp, lines = self._longcmdstring(cmd, file) + return resp, self._grouplist(lines) + + def newnews(self, group, date, *, file=None): """Process a NEWNEWS command. Arguments: - group: group name or '*' - - date: string 'yymmdd' indicating the date - - time: string 'hhmmss' indicating the time + - date: a date or datetime object Return: - resp: server response if successful - - list: list of message ids""" - - cmd = 'NEWNEWS ' + group + ' ' + date + ' ' + time - return self.longcmd(cmd, file) - - def list(self, file=None): - """Process a LIST command. Return: + - list: list of message ids + """ + if not isinstance(date, (datetime.date, datetime.date)): + raise TypeError( + "the date parameter must be a date or datetime object, " + "not '{:40}'".format(date.__class__.__name__)) + date_str, time_str = _unparse_datetime(date, self.nntp_version < 2) + cmd = 'NEWNEWS {0} {1} {2}'.format(group, date_str, time_str) + return self._longcmdstring(cmd, file) + + def list(self, *, file=None): + """Process a LIST command. Argument: + - file: Filename string or file object to store the result in + Returns: - resp: server response if successful - - list: list of (group, last, first, flag) (strings)""" + - list: list of (group, last, first, flag) (strings) + """ + resp, lines = self._longcmdstring('LIST', file) + return resp, self._grouplist(lines) - resp, list = self.longcmd('LIST', file) - for i in range(len(list)): - # Parse lines into "group last first flag" - list[i] = tuple(list[i].split()) - return resp, list + def _getdescriptions(self, group_pattern, return_all): + line_pat = re.compile('^(?P<group>[^ \t]+)[ \t]+(.*)$') + # Try the more std (acc. to RFC2980) LIST NEWSGROUPS first + resp, lines = self._longcmdstring('LIST NEWSGROUPS ' + group_pattern) + if not resp.startswith('215'): + # Now the deprecated XGTITLE. This either raises an error + # or succeeds with the same output structure as LIST + # NEWSGROUPS. + resp, lines = self._longcmdstring('XGTITLE ' + group_pattern) + groups = {} + for raw_line in lines: + match = line_pat.search(raw_line.strip()) + if match: + name, desc = match.group(1, 2) + if not return_all: + return desc + groups[name] = desc + if return_all: + return resp, groups + else: + # Nothing found + return '' def description(self, group): - """Get a description for a single group. If more than one group matches ('group' is a pattern), return the first. If no group matches, return an empty string. @@ -311,42 +611,24 @@ class NNTP: NOTE: This neither checks for a wildcard in 'group' nor does it check whether the group actually exists.""" - - resp, lines = self.descriptions(group) - if len(lines) == 0: - return b'' - else: - return lines[0][1] + return self._getdescriptions(group, False) def descriptions(self, group_pattern): """Get descriptions for a range of groups.""" - line_pat = re.compile(b'^(?P<group>[^ \t]+)[ \t]+(.*)$') - # Try the more std (acc. to RFC2980) LIST NEWSGROUPS first - resp, raw_lines = self.longcmd('LIST NEWSGROUPS ' + group_pattern) - if not resp.startswith(b'215'): - # Now the deprecated XGTITLE. This either raises an error - # or succeeds with the same output structure as LIST - # NEWSGROUPS. - resp, raw_lines = self.longcmd('XGTITLE ' + group_pattern) - lines = [] - for raw_line in raw_lines: - match = line_pat.search(raw_line.strip()) - if match: - lines.append(match.group(1, 2)) - return resp, lines + return self._getdescriptions(group_pattern, True) def group(self, name): """Process a GROUP command. Argument: - group: the group name Returns: - resp: server response if successful - - count: number of articles (string) - - first: first article number (string) - - last: last article number (string) - - name: the group name""" - - resp = self.shortcmd('GROUP ' + name) - if not resp.startswith(b'211'): + - count: number of articles + - first: first article number + - last: last article number + - name: the group name + """ + resp = self._shortcmd('GROUP ' + name) + if not resp.startswith('211'): raise NNTPReplyError(resp) words = resp.split() count = first = last = 0 @@ -359,151 +641,177 @@ class NNTP: last = words[3] if n > 4: name = words[4].lower() - return resp, count, first, last, name + return resp, int(count), int(first), int(last), name - def help(self, file=None): - """Process a HELP command. Returns: + def help(self, *, file=None): + """Process a HELP command. Argument: + - file: Filename string or file object to store the result in + Returns: - resp: server response if successful - - list: list of strings""" - - return self.longcmd('HELP',file) + - list: list of strings returned by the server in response to the + HELP command + """ + return self._longcmdstring('HELP', file) - def statparse(self, resp): - """Internal: parse the response of a STAT, NEXT or LAST command.""" - if not resp.startswith(b'22'): + def _statparse(self, resp): + """Internal: parse the response line of a STAT, NEXT, LAST, + ARTICLE, HEAD or BODY command.""" + if not resp.startswith('22'): raise NNTPReplyError(resp) words = resp.split() - nr = 0 - id = b'' - n = len(words) - if n > 1: - nr = words[1] - if n > 2: - id = words[2] - return resp, nr, id + art_num = int(words[1]) + message_id = words[2] + return resp, art_num, message_id - def statcmd(self, line): + def _statcmd(self, line): """Internal: process a STAT, NEXT or LAST command.""" - resp = self.shortcmd(line) - return self.statparse(resp) + resp = self._shortcmd(line) + return self._statparse(resp) - def stat(self, id): + def stat(self, message_spec=None): """Process a STAT command. Argument: - - id: article number or message id + - message_spec: article number or message id (if not specified, + the current article is selected) Returns: - resp: server response if successful - - nr: the article number - - id: the message id""" - - return self.statcmd('STAT {0}'.format(id)) + - art_num: the article number + - message_id: the message id + """ + if message_spec: + return self._statcmd('STAT {0}'.format(message_spec)) + else: + return self._statcmd('STAT') def next(self): """Process a NEXT command. No arguments. Return as for STAT.""" - return self.statcmd('NEXT') + return self._statcmd('NEXT') def last(self): """Process a LAST command. No arguments. Return as for STAT.""" - return self.statcmd('LAST') + return self._statcmd('LAST') - def artcmd(self, line, file=None): + def _artcmd(self, line, file=None): """Internal: process a HEAD, BODY or ARTICLE command.""" - resp, list = self.longcmd(line, file) - resp, nr, id = self.statparse(resp) - return resp, nr, id, list + resp, lines = self._longcmd(line, file) + resp, art_num, message_id = self._statparse(resp) + return resp, ArticleInfo(art_num, message_id, lines) - def head(self, id): + def head(self, message_spec=None, *, file=None): """Process a HEAD command. Argument: - - id: article number or message id + - message_spec: article number or message id + - file: filename string or file object to store the headers in Returns: - resp: server response if successful - - nr: article number - - id: message id - - list: the lines of the article's header""" - - return self.artcmd('HEAD {0}'.format(id)) + - ArticleInfo: (article number, message id, list of header lines) + """ + if message_spec is not None: + cmd = 'HEAD {0}'.format(message_spec) + else: + cmd = 'HEAD' + return self._artcmd(cmd, file) - def body(self, id, file=None): + def body(self, message_spec=None, *, file=None): """Process a BODY command. Argument: - - id: article number or message id - - file: Filename string or file object to store the article in + - message_spec: article number or message id + - file: filename string or file object to store the body in Returns: - resp: server response if successful - - nr: article number - - id: message id - - list: the lines of the article's body or an empty list - if file was used""" - - return self.artcmd('BODY {0}'.format(id), file) + - ArticleInfo: (article number, message id, list of body lines) + """ + if message_spec is not None: + cmd = 'BODY {0}'.format(message_spec) + else: + cmd = 'BODY' + return self._artcmd(cmd, file) - def article(self, id): + def article(self, message_spec=None, *, file=None): """Process an ARTICLE command. Argument: - - id: article number or message id + - message_spec: article number or message id + - file: filename string or file object to store the article in Returns: - resp: server response if successful - - nr: article number - - id: message id - - list: the lines of the article""" - - return self.artcmd('ARTICLE {0}'.format(id)) + - ArticleInfo: (article number, message id, list of article lines) + """ + if message_spec is not None: + cmd = 'ARTICLE {0}'.format(message_spec) + else: + cmd = 'ARTICLE' + return self._artcmd(cmd, file) def slave(self): """Process a SLAVE command. Returns: - - resp: server response if successful""" - - return self.shortcmd('SLAVE') + - resp: server response if successful + """ + return self._shortcmd('SLAVE') - def xhdr(self, hdr, str, file=None): + def xhdr(self, hdr, str, *, file=None): """Process an XHDR command (optional server extension). Arguments: - hdr: the header type (e.g. 'subject') - str: an article nr, a message id, or a range nr1-nr2 + - file: Filename string or file object to store the result in Returns: - resp: server response if successful - - list: list of (nr, value) strings""" - - pat = re.compile(b'^([0-9]+) ?(.*)\n?') - resp, lines = self.longcmd('XHDR {0} {1}'.format(hdr, str), file) - for i in range(len(lines)): - line = lines[i] + - list: list of (nr, value) strings + """ + pat = re.compile('^([0-9]+) ?(.*)\n?') + resp, lines = self._longcmdstring('XHDR {0} {1}'.format(hdr, str), file) + def remove_number(line): m = pat.match(line) - if m: - lines[i] = m.group(1, 2) - return resp, lines + return m.group(1, 2) if m else line + return resp, [remove_number(line) for line in lines] - def xover(self, start, end, file=None): + def xover(self, start, end, *, file=None): """Process an XOVER command (optional server extension) Arguments: - start: start of range - end: end of range + - file: Filename string or file object to store the result in + Returns: + - resp: server response if successful + - list: list of dicts containing the response fields + """ + resp, lines = self._longcmdstring('XOVER {0}-{1}'.format(start, end), + file) + fmt = self._getoverviewfmt() + return resp, _parse_overview(lines, fmt) + + def over(self, message_spec, *, file=None): + """Process an OVER command. If the command isn't supported, fall + back to XOVER. Arguments: + - message_spec: + - either a message id, indicating the article to fetch + information about + - or a (start, end) tuple, indicating a range of article numbers; + if end is None, information up to the newest message will be + retrieved + - or None, indicating the current article number must be used + - file: Filename string or file object to store the result in Returns: - resp: server response if successful - - list: list of (art-nr, subject, poster, date, - id, references, size, lines)""" + - list: list of dicts containing the response fields - resp, lines = self.longcmd('XOVER {0}-{1}'.format(start, end), file) - xover_lines = [] - for line in lines: - elem = line.split(b'\t') - try: - xover_lines.append((elem[0], - elem[1], - elem[2], - elem[3], - elem[4], - elem[5].split(), - elem[6], - elem[7])) - except IndexError: - raise NNTPDataError(line) - return resp,xover_lines - - def xgtitle(self, group, file=None): + NOTE: the "message id" form isn't supported by XOVER + """ + cmd = 'OVER' if 'OVER' in self._caps else 'XOVER' + if isinstance(message_spec, (tuple, list)): + start, end = message_spec + cmd += ' {0}-{1}'.format(start, end or '') + elif message_spec is not None: + cmd = cmd + ' ' + message_spec + resp, lines = self._longcmdstring(cmd, file) + fmt = self._getoverviewfmt() + return resp, _parse_overview(lines, fmt) + + def xgtitle(self, group, *, file=None): """Process an XGTITLE command (optional server extension) Arguments: - group: group name wildcard (i.e. news.*) Returns: - resp: server response if successful - list: list of (name,title) strings""" - - line_pat = re.compile(b'^([^ \t]+)[ \t]+(.*)$') - resp, raw_lines = self.longcmd('XGTITLE ' + group, file) + warnings.warn("The XGTITLE extension is not actively used, " + "use descriptions() instead", + PendingDeprecationWarning, 2) + line_pat = re.compile('^([^ \t]+)[ \t]+(.*)$') + resp, raw_lines = self._longcmdstring('XGTITLE ' + group, file) lines = [] for raw_line in raw_lines: match = line_pat.search(raw_line.strip()) @@ -511,15 +819,18 @@ class NNTP: lines.append(match.group(1, 2)) return resp, lines - def xpath(self,id): + def xpath(self, id): """Process an XPATH command (optional server extension) Arguments: - id: Message id of article Returns: resp: server response if successful - path: directory path to article""" + path: directory path to article + """ + warnings.warn("The XPATH extension is not actively used", + PendingDeprecationWarning, 2) - resp = self.shortcmd('XPATH {0}'.format(id)) - if not resp.startswith(b'223'): + resp = self._shortcmd('XPATH {0}'.format(id)) + if not resp.startswith('223'): raise NNTPReplyError(resp) try: [resp_num, path] = resp.split() @@ -528,89 +839,144 @@ class NNTP: else: return resp, path - def date (self): - """Process the DATE command. Arguments: - None + def date(self): + """Process the DATE command. Returns: - resp: server response if successful - date: Date suitable for newnews/newgroups commands etc. - time: Time suitable for newnews/newgroups commands etc.""" - - resp = self.shortcmd("DATE") - if not resp.startswith(b'111'): + - resp: server response if successful + - date: datetime object + """ + resp = self._shortcmd("DATE") + if not resp.startswith('111'): raise NNTPReplyError(resp) elem = resp.split() if len(elem) != 2: raise NNTPDataError(resp) - date = elem[1][2:8] - time = elem[1][-6:] - if len(date) != 6 or len(time) != 6: + date = elem[1] + if len(date) != 14: raise NNTPDataError(resp) - return resp, date, time + return resp, _parse_datetime(date, None) def _post(self, command, f): - resp = self.shortcmd(command) - # Raises error_??? if posting is not allowed - if not resp.startswith(b'3'): + resp = self._shortcmd(command) + # Raises a specific exception if posting is not allowed + if not resp.startswith('3'): raise NNTPReplyError(resp) - while 1: - line = f.readline() - if not line: - break - if line.endswith(b'\n'): - line = line[:-1] + if isinstance(f, (bytes, bytearray)): + f = f.splitlines() + # We don't use _putline() because: + # - we don't want additional CRLF if the file or iterable is already + # in the right format + # - we don't want a spurious flush() after each line is written + for line in f: + if not line.endswith(_CRLF): + line = line.rstrip(b"\r\n") + _CRLF if line.startswith(b'.'): line = b'.' + line - self.putline(line) - self.putline(b'.') - return self.getresp() + self.file.write(line) + self.file.write(b".\r\n") + self.file.flush() + return self._getresp() - def post(self, f): + def post(self, data): """Process a POST command. Arguments: - - f: file containing the article + - data: bytes object, iterable or file containing the article Returns: - resp: server response if successful""" - return self._post('POST', f) + return self._post('POST', data) - def ihave(self, id, f): + def ihave(self, message_id, data): """Process an IHAVE command. Arguments: - - id: message-id of the article - - f: file containing the article + - message_id: message-id of the article + - data: file containing the article Returns: - resp: server response if successful Note that if the server refuses the article an exception is raised.""" - return self._post('IHAVE {0}'.format(id), f) + return self._post('IHAVE {0}'.format(message_id), data) + + def _close(self): + self.file.close() + del self.file def quit(self): """Process a QUIT command and close the socket. Returns: - resp: server response if successful""" - - resp = self.shortcmd('QUIT') - self.file.close() - self.sock.close() - del self.file, self.sock + try: + resp = self._shortcmd('QUIT') + finally: + self._close() return resp +class NNTP(_NNTPBase): + + def __init__(self, host, port=NNTP_PORT, user=None, password=None, + readermode=None, usenetrc=True, + timeout=_GLOBAL_DEFAULT_TIMEOUT): + """Initialize an instance. Arguments: + - host: hostname to connect to + - port: port to connect to (default the standard NNTP port) + - user: username to authenticate with + - password: password to use with username + - readermode: if true, send 'mode reader' command after + connecting. + - usenetrc: allow loading username and password from ~/.netrc file + if not specified explicitly + - timeout: timeout (in seconds) used for socket connections + + readermode is sometimes necessary if you are connecting to an + NNTP server on the local machine and intend to call + reader-specific comamnds, such as `group'. If you get + unexpected NNTPPermanentErrors, you might need to set + readermode. + """ + self.host = host + self.port = port + self.sock = socket.create_connection((host, port), timeout) + file = self.sock.makefile("rwb") + _NNTPBase.__init__(self, file, user, password, + readermode, usenetrc, timeout) + + def _close(self): + try: + _NNTPBase._close(self) + finally: + self.sock.close() + + # Test retrieval when run as a script. -# Assumption: if there's a local news server, it's called 'news'. -# Assumption: if user queries a remote news server, it's named -# in the environment variable NNTPSERVER (used by slrn and kin) -# and we want readermode off. if __name__ == '__main__': - import os - newshost = 'news' and os.environ["NNTPSERVER"] - if newshost.find('.') == -1: - mode = 'readermode' - else: - mode = None - s = NNTP(newshost, readermode=mode) - resp, count, first, last, name = s.group('comp.lang.python') - print(resp) + import argparse + from email.utils import parsedate + + parser = argparse.ArgumentParser(description="""\ + nntplib built-in demo - display the latest articles in a newsgroup""") + parser.add_argument('-g', '--group', default='gmane.comp.python.general', + help='group to fetch messages from (default: %(default)s)') + parser.add_argument('-s', '--server', default='news.gmane.org', + help='NNTP server hostname (default: %(default)s)') + parser.add_argument('-p', '--port', default=NNTP_PORT, type=int, + help='NNTP port number (default: %(default)s)') + parser.add_argument('-n', '--nb-articles', default=10, type=int, + help='number of articles to fetch (default: %(default)s)') + args = parser.parse_args() + + s = NNTP(host=args.server, port=args.port) + resp, count, first, last, name = s.group(args.group) print('Group', name, 'has', count, 'articles, range', first, 'to', last) - resp, subs = s.xhdr('subject', '{0}-{1}'.format(first, last)) - print(resp) - for item in subs: - print("%7s %s" % item) - resp = s.quit() - print(resp) + + def cut(s, lim): + if len(s) > lim: + s = s[:lim - 4] + "..." + return s + + first = str(int(last) - args.nb_articles + 1) + resp, overviews = s.xover(first, last) + for artnum, over in overviews: + author = decode_header(over['from']).split('<', 1)[0] + subject = decode_header(over['subject']) + lines = int(over[':lines']) + print("{:7} {:20} {:42} ({})".format( + artnum, cut(author, 20), cut(subject, 42), lines) + ) + + s.quit() |