summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2000-03-31 17:51:10 (GMT)
committerFred Drake <fdrake@acm.org>2000-03-31 17:51:10 (GMT)
commit3c9f936eee8bd826b631ee886ec3bb063da35fbd (patch)
tree54b7c58530933c9da34a6e63a372229202fe2826
parent32abe6f7d0a82358efd0494992f3c388d7b24036 (diff)
downloadcpython-3c9f936eee8bd826b631ee886ec3bb063da35fbd.zip
cpython-3c9f936eee8bd826b631ee886ec3bb063da35fbd.tar.gz
cpython-3c9f936eee8bd826b631ee886ec3bb063da35fbd.tar.bz2
Two new sections. Preliminary.
-rw-r--r--Doc/lib/lib.tex2
-rw-r--r--Doc/lib/librobotparser.tex68
-rw-r--r--Doc/lib/libzipfile.tex169
3 files changed, 239 insertions, 0 deletions
diff --git a/Doc/lib/lib.tex b/Doc/lib/lib.tex
index 33bb9c5..2750984 100644
--- a/Doc/lib/lib.tex
+++ b/Doc/lib/lib.tex
@@ -165,6 +165,7 @@ and how to embed it in other applications.
\input{libbsddb}
\input{libzlib}
\input{libgzip}
+\input{libzipfile}
\input{librlcompleter}
\input{libunix} % UNIX Specific Services
@@ -230,6 +231,7 @@ and how to embed it in other applications.
\input{libmhlib}
\input{libmimify}
\input{libnetrc}
+\input{librobotparser}
\input{librestricted}
\input{librexec}
diff --git a/Doc/lib/librobotparser.tex b/Doc/lib/librobotparser.tex
new file mode 100644
index 0000000..bf35fac
--- /dev/null
+++ b/Doc/lib/librobotparser.tex
@@ -0,0 +1,68 @@
+\section{\module{robotparser} ---
+ Parser for \filenq{robots.txt}}
+
+\declaremodule{standard}{robotparser}
+\modulesynopsis{Accepts as input a list of lines or URL that refers to a
+ robots.txt file, parses the file, then builds a
+ set of rules from that list and answers questions
+ about fetchability of other URLs.}
+\sectionauthor{Skip Montanaro}{skip@mojam.com}
+
+\index{WWW}
+\index{World-Wide Web}
+\index{URL}
+\index{robots.txt}
+
+This module provides a single class, \class{RobotFileParser}, which answers
+questions about whether or not a particular user agent can fetch a URL on
+the web site that published the \file{robots.txt} file. For more details on
+the structure of \file{robots.txt} files, see
+\url{http://info.webcrawler.com/mak/projects/robots/norobots.html}.
+
+\begin{classdesc}{RobotFileParser}{}
+
+This class provides a set of methods to read, parse and answer questions
+about a single \file{robots.txt} file.
+
+\begin{methoddesc}{set_url}{url}
+Sets the URL referring to a \file{robots.txt} file.
+\end{methoddesc}
+
+\begin{methoddesc}{read}{}
+Reads the \file{robots.txt} URL and feeds it to the parser.
+\end{methoddesc}
+
+\begin{methoddesc}{parse}{lines}
+Parses the lines argument.
+\end{methoddesc}
+
+\begin{methoddesc}{can_fetch}{useragent, url}
+Returns true if the \var{useragent} is allowed to fetch the \var{url}
+according to the rules contained in the parsed \file{robots.txt} file.
+\end{methoddesc}
+
+\begin{methoddesc}{mtime}{}
+Returns the time the \code{robots.txt} file was last fetched. This is
+useful for long-running web spiders that need to check for new
+\code{robots.txt} files periodically.
+\end{methoddesc}
+
+\begin{methoddesc}{modified}{}
+Sets the time the \code{robots.txt} file was last fetched to the current
+time.
+\end{methoddesc}
+
+\end{classdesc}
+
+The following example demonstrates basic use of the RobotFileParser class.
+
+\begin{verbatim}
+>>> import robotparser
+>>> rp = robotparser.RobotFileParser()
+>>> rp.set_url("http://www.musi-cal.com/robots.txt")
+>>> rp.read()
+>>> rp.can_fetch("*", "http://www.musi-cal.com/cgi-bin/search?city=San+Francisco")
+0
+>>> rp.can_fetch("*", "http://www.musi-cal.com/")
+1
+\end{verbatim}
diff --git a/Doc/lib/libzipfile.tex b/Doc/lib/libzipfile.tex
new file mode 100644
index 0000000..0b1d43d
--- /dev/null
+++ b/Doc/lib/libzipfile.tex
@@ -0,0 +1,169 @@
+\section{\module{zipfile} ---
+ Work with ZIP archives}
+
+\modulesynopsis{Read and write ZIP-format archive files.}
+\moduleauthor{James C. Ahlstrom}{jim@interet.com}
+\sectionauthor{James C. Ahlstrom}{jim@interet.com}
+% LaTeX markup by Fred L. Drake, Jr. <fdrake@acm.org>
+
+The ZIP file format is a common archive and compression standard.
+This module provides tools to create, read, write, append, and list a
+ZIP file.
+
+The available attributes of this module are:
+
+\begin{excdesc}{error}
+ The error raised for bad ZIP files.
+\end{excdesc}
+
+\begin{datadesc}{_debug}
+ Level of printing, defaults to \code{1}.
+\end{datadesc}
+
+\begin{classdesc}{ZipFile}{...}
+ The class for reading and writing ZIP files. See
+ ``\citetitle{ZipFile Objects}'' (section \ref{zipfile-objects}) for
+ constructor details.
+\end{classdesc}
+
+\begin{funcdesc}{is_zipfile}{path}
+ Returns true if \var{path} is a valid ZIP file based on its magic
+ number, otherwise returns false. This module does not currently
+ handle ZIP files which have appended comments.
+\end{funcdesc}
+
+\begin{funcdesc}{zip2date}{zdate}
+ Return \code{(\var{year}, \var{month}, \var{day})} for a ZIP date
+ code.
+\end{funcdesc}
+
+\begin{funcdesc}{zip2time}{ztime}
+ Return \code{(\var{hour}, \var{minute}, \var{second})} for a ZIP
+ time code.
+\end{funcdesc}
+
+\begin{funcdesc}{date2zip}{year, month, day}
+ Return a ZIP date code.
+\end{funcdesc}
+
+\begin{funcdesc}{time2zip}{hour, minute, second}
+ Return a ZIP time code.
+\end{funcdesc}
+
+\begin{datadesc}{ZIP_STORED}
+ The numeric constant (\code{0}) for an uncompressed archive member.
+\end{datadesc}
+
+\begin{datadesc}{ZIP_DEFLATED}
+ The numeric constant for the usual ZIP compression method. This
+ requires the zlib module. No other compression methods are
+ currently supported.
+\end{datadesc}
+
+
+\begin{seealso}
+ \seetext{XXX point to ZIP format definition}
+ \seetext{XXX point to Info-ZIP home page; mention WiZ}
+\end{seealso}
+
+
+\subsection{ZipFile Objects \label{zipfile-objects}}
+
+\begin{classdesc}{ZipFile}{filename\optional{, mode\optional{, compression}}}
+ Open a ZIP file named \var{filename}. The \var{mode} parameter
+ should be \code{'r'} to read an existing file, \code{'w'} to
+ truncate and write a new file, or \code{'a'} to append to an
+ existing file. For \var{mode} is \code{'a'} and \var{filename}
+ refers to an existing ZIP file, then additional files are added to
+ it. If \var{filename} does not refer to a ZIP file, then a new ZIP
+ archive is appended to the file. This is meant for adding a ZIP
+ archive to another file, such as \file{python.exe}. Using
+\begin{verbatim}
+cat myzip.zip >> python.exe
+\end{verbatim}
+ also works, and at least \program{WinZip} can read such files.
+ \var{compression} is the ZIP compression method to use when writing
+ the archive, and should be \constant{ZIP_STORED} or
+ \constant{ZIP_DEFLATED}; unrecognized values will cause
+ \exception{ValueError} to be raised. The default is
+ \constant{ZIP_STORED}.
+\end{classdesc}
+
+XXX explain the "extra" string for the ZIP format
+
+\begin{memberdesc}{TOC}
+ A read-only dictionary whose keys are the names in the archive, and
+ whose values are tuples as follows:
+
+\begin{tableii}{c|l}{code}{Index}{Meaning}
+ \lineii{0}{File data seek offset}
+ \lineii{1}{ZIP file "extra" data as a string}
+ \lineii{2}{ZIP file bit flags}
+ \lineii{3}{ZIP file compression type}
+ \lineii{4}{File modification time in DOS format}
+ \lineii{5}{File modification date in DOS format}
+ \lineii{6}{The CRC-32 of the uncompressed data}
+ \lineii{7}{The compressed size of the file}
+ \lineii{8}{The uncompressed size of the file}
+\end{tableii}
+\end{memberdesc}
+
+The class ZipFile has these methods:
+
+\begin{methoddesc}{listdir}{}
+ Return a list of names in the archive. Equivalent to
+ \code{\var{zipfile}.TOC.keys()}.
+\end{methoddesc}
+
+\begin{methoddesc}{printdir}{}
+ Print a table of contents for the archive to stdout.
+\end{methoddesc}
+
+\begin{methoddesc}{read}{name}
+ Return the bytes of the file in the archive. The archive must be
+ open for read or append.
+\end{methoddesc}
+
+\begin{methoddesc}{writestr}{bytes, arcname, year, month, day, hour,
+ minute, second\optional{, extra}}
+ Write the string \var{bytes} and the other data to the archive, and
+ give the archive member the name \var{arcname}. \var{extra} is the
+ ZIP extra data string. The archive must be opened with mode
+ \code{'w'} or \code{'a'}.
+\end{methoddesc}
+
+\begin{methoddesc}{write}{filename, arcname\optional{, extra}}
+ Write the file named \var{filename} to the archive, giving it the
+ archive name \var{arcname}. \var{extra} is the ZIP extra data
+ string. The archive must be open with mode \code{'w'} or
+ \code{'a'}.
+\end{methoddesc}
+
+\begin{methoddesc}{writepy}{pathname\optional{, basename}}
+ Search for files \file{*.py} and add the corresponding file to the
+ archive. The corresponding file is a \file{*.pyo} file if
+ available, else a \file{*.pyc} file, compiling if necessary. If the
+ pathname is a file, the filename must end with \file{.py}, and just
+ the (corresponding \file{*.py[oc]}) file is added at the top level
+ (no path information). If it is a directory, and the directory is
+ not a package directory, then all the files \file{*.py[oc]} are
+ added at the top level. If the directory is a package directory,
+ then all \file{*.py[oc]} are added under the package name as a file
+ path, and if any subdirectories are package directories, all of
+ these are added recursively. \var{basename} is intended for
+ internal use only. The \method{writepy()} method makes archives
+ with file names like this:
+
+\begin{verbatim}
+ string.pyc # Top level name
+ test/__init__.pyc # Package directory
+ test/testall.pyc # Module test.testall
+ test/bogus/__init__.pyc # Subpackage directory
+ test/bogus/myfile.pyc # Submodule test.bogus.myfile
+\end{verbatim}
+\end{methoddesc}
+
+\begin{methoddesc}{close}{}
+ Close the archive file. You must call \method{close()} before
+ exiting your program or essential records will not be written.
+\end{methoddesc}