diff options
Diffstat (limited to 'Doc/lib/librobotparser.tex')
-rw-r--r-- | Doc/lib/librobotparser.tex | 66 |
1 files changed, 0 insertions, 66 deletions
diff --git a/Doc/lib/librobotparser.tex b/Doc/lib/librobotparser.tex deleted file mode 100644 index 5eac528..0000000 --- a/Doc/lib/librobotparser.tex +++ /dev/null @@ -1,66 +0,0 @@ -\section{\module{robotparser} --- - Parser for robots.txt} - -\declaremodule{standard}{robotparser} -\modulesynopsis{Loads a \protect\file{robots.txt} file and - answers questions about fetchability of other URLs.} -\sectionauthor{Skip Montanaro}{skip@mojam.com} - -\index{WWW} -\index{World Wide Web} -\index{URL} -\index{robots.txt} - -This module provides a single class, \class{RobotFileParser}, which answers -questions about whether or not a particular user agent can fetch a URL on -the Web site that published the \file{robots.txt} file. For more details on -the structure of \file{robots.txt} files, see -\url{http://www.robotstxt.org/wc/norobots.html}. - -\begin{classdesc}{RobotFileParser}{} - -This class provides a set of methods to read, parse and answer questions -about a single \file{robots.txt} file. - -\begin{methoddesc}{set_url}{url} -Sets the URL referring to a \file{robots.txt} file. -\end{methoddesc} - -\begin{methoddesc}{read}{} -Reads the \file{robots.txt} URL and feeds it to the parser. -\end{methoddesc} - -\begin{methoddesc}{parse}{lines} -Parses the lines argument. -\end{methoddesc} - -\begin{methoddesc}{can_fetch}{useragent, url} -Returns \code{True} if the \var{useragent} is allowed to fetch the \var{url} -according to the rules contained in the parsed \file{robots.txt} file. -\end{methoddesc} - -\begin{methoddesc}{mtime}{} -Returns the time the \code{robots.txt} file was last fetched. This is -useful for long-running web spiders that need to check for new -\code{robots.txt} files periodically. -\end{methoddesc} - -\begin{methoddesc}{modified}{} -Sets the time the \code{robots.txt} file was last fetched to the current -time. -\end{methoddesc} - -\end{classdesc} - -The following example demonstrates basic use of the RobotFileParser class. - -\begin{verbatim} ->>> import robotparser ->>> rp = robotparser.RobotFileParser() ->>> rp.set_url("http://www.musi-cal.com/robots.txt") ->>> rp.read() ->>> rp.can_fetch("*", "http://www.musi-cal.com/cgi-bin/search?city=San+Francisco") -False ->>> rp.can_fetch("*", "http://www.musi-cal.com/") -True -\end{verbatim} |