summaryrefslogtreecommitdiffstats
path: root/Doc/ref
diff options
context:
space:
mode:
authorFred Drake <fdrake@acm.org>2001-08-14 21:43:31 (GMT)
committerFred Drake <fdrake@acm.org>2001-08-14 21:43:31 (GMT)
commitc0cf726d8c7224ec1e4c31b99b965314dadf7a16 (patch)
tree20e699a7344ba74fe98f410690ceaa2a2f718597 /Doc/ref
parent0cac5f697bd98c308df4162a2eccdd552c6e1772 (diff)
downloadcpython-c0cf726d8c7224ec1e4c31b99b965314dadf7a16.zip
cpython-c0cf726d8c7224ec1e4c31b99b965314dadf7a16.tar.gz
cpython-c0cf726d8c7224ec1e4c31b99b965314dadf7a16.tar.bz2
Add material about the "r" and "u" prefixes for string literals; should
be reviewed for clarity. Work around a bogosity in the HTML version of the escape sequences table conversion.
Diffstat (limited to 'Doc/ref')
-rw-r--r--Doc/ref/ref2.tex22
1 files changed, 14 insertions, 8 deletions
diff --git a/Doc/ref/ref2.tex b/Doc/ref/ref2.tex
index 0e4ea26..3ccfaef 100644
--- a/Doc/ref/ref2.tex
+++ b/Doc/ref/ref2.tex
@@ -311,7 +311,9 @@ String literals are described by the following lexical definitions:
\index{ASCII@\ASCII{}}
\begin{productionlist}
\production{stringliteral}
- {\token{shortstring} | \token{longstring}}
+ {[\token{stringprefix}](\token{shortstring} | \token{longstring})}
+ \production{stringprefix}
+ {"r" | "u" | "ur" | "R" | "U" | "UR" | "Ur" | "uR"}
\production{shortstring}
{"'" \token{shortstringitem}* "'"
| '"' \token{shortstringitem}* '"'}
@@ -325,11 +327,15 @@ String literals are described by the following lexical definitions:
\production{shortstringchar}
{<any ASCII character except "\e" or newline or the quote>}
\production{longstringchar}
- {<any ASCII character except "\e">}
+ {<any ASCII characteru except "\e">}
\production{escapeseq}
{"\e" <any ASCII character>}
\end{productionlist}
+One syntactic restriction not indicated by these productions is that
+whitespace is not allowed between the \grammartoken{stringprefix} and
+the rest of the string literal.
+
\index{triple-quoted string}
\index{Unicode Consortium}
\index{string!Unicode}
@@ -340,12 +346,14 @@ are generally referred to as \emph{triple-quoted strings}). The
backslash (\code{\e}) character is used to escape characters that
otherwise have a special meaning, such as newline, backslash itself,
or the quote character. String literals may optionally be prefixed
-with a letter `r' or `R'; such strings are called
-\dfn{raw strings}\index{raw string} and use different rules for
+with a letter `r' or `R'; such strings are called \dfn{raw
+strings}\index{raw string} and use different rules for interpreting
backslash escape sequences. A prefix of 'u' or 'U' makes the string
a Unicode string. Unicode strings use the Unicode character set as
defined by the Unicode Consortium and ISO~10646. Some additional
escape sequences, described below, are available in Unicode strings.
+The two prefix characters may be combined; in this case, `u' must
+appear before `r'.
In triple-quoted strings,
unescaped newlines and quotes are allowed (and are retained), except
@@ -374,10 +382,8 @@ to those used by Standard C. The recognized escape sequences are:
{Character named \var{name} in the Unicode database (Unicode only)}
\lineii{\e r} {\ASCII{} Carriage Return (CR)}
\lineii{\e t} {\ASCII{} Horizontal Tab (TAB)}
-\lineii{\e u\var{xxxx}}
- {Character with 16-bit hex value \var{xxxx} (Unicode only)}
-\lineii{\e U\var{xxxxxxxx}}
- {Character with 32-bit hex value \var{xxxxxxxx} (Unicode only)}
+\lineii{\e u\var{xxxx}} {Character with 16-bit hex value \var{xxxx} (Unicode only)}
+\lineii{\e U\var{xxxxxxxx}}{Character with 32-bit hex value \var{xxxxxxxx} (Unicode only)}
\lineii{\e v} {\ASCII{} Vertical Tab (VT)}
\lineii{\e\var{ooo}} {\ASCII{} character with octal value \var{ooo}}
\lineii{\e x\var{hh}} {\ASCII{} character with hex value \var{hh}}