summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBrian Quinlan <brian@sweetapp.com>2011-04-07 22:30:41 (GMT)
committerBrian Quinlan <brian@sweetapp.com>2011-04-07 22:30:41 (GMT)
commitd08b330a1538132737d9de646a53dd9dae581d57 (patch)
tree5cc75a8b44d3a742d75e7872631c3b9e7df211bf
parentf007876bd64def2829a242e0cf5adfd3ef25c4be (diff)
parentb1eb6602703d040b91e444a8764ea1ba8af69a21 (diff)
downloadcpython-d08b330a1538132737d9de646a53dd9dae581d57.zip
cpython-d08b330a1538132737d9de646a53dd9dae581d57.tar.gz
cpython-d08b330a1538132737d9de646a53dd9dae581d57.tar.bz2
Merge to tip.
-rw-r--r--Lib/html/parser.py2
-rwxr-xr-xLib/test/regrtest.py2
-rw-r--r--Lib/test/test_faulthandler.py7
-rw-r--r--Lib/test/test_htmlparser.py17
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/faulthandler.c50
-rw-r--r--setup.py2
7 files changed, 61 insertions, 21 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index 21ebbc3..a3586eb 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -28,7 +28,7 @@ tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
# make it correctly strict without breaking backward compatibility.
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
attrfind_tolerant = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
diff --git a/Lib/test/regrtest.py b/Lib/test/regrtest.py
index 704ad77..414aeff 100755
--- a/Lib/test/regrtest.py
+++ b/Lib/test/regrtest.py
@@ -240,7 +240,7 @@ def main(tests=None, testdir=None, verbose=0, quiet=False,
findleaks=False, use_resources=None, trace=False, coverdir='coverage',
runleaks=False, huntrleaks=False, verbose2=False, print_slow=False,
random_seed=None, use_mp=None, verbose3=False, forever=False,
- header=False, timeout=30*60):
+ header=False, timeout=60*60):
"""Execute a test suite.
This also parses command-line options and modifies its behavior
diff --git a/Lib/test/test_faulthandler.py b/Lib/test/test_faulthandler.py
index 59a0a6d..bfe662e 100644
--- a/Lib/test/test_faulthandler.py
+++ b/Lib/test/test_faulthandler.py
@@ -8,6 +8,12 @@ from test import support, script_helper
import tempfile
import unittest
+try:
+ import threading
+ HAVE_THREADS = True
+except ImportError:
+ HAVE_THREADS = False
+
TIMEOUT = 0.5
try:
@@ -279,6 +285,7 @@ funcA()
with temporary_filename() as filename:
self.check_dump_traceback(filename)
+ @unittest.skipIf(not HAVE_THREADS, 'need threads')
def check_dump_traceback_threads(self, filename):
"""
Call explicitly dump_traceback(all_threads=True) and check the output.
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
index 5ecd016..637ab01 100644
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -217,6 +217,23 @@ DOCTYPE html [
("starttag", "a", [("href", "mailto:xyz@example.com")]),
])
+ def test_attr_nonascii(self):
+ # see issue 7311
+ self._run_check("<img src=/foo/bar.png alt=\u4e2d\u6587>", [
+ ("starttag", "img", [("src", "/foo/bar.png"),
+ ("alt", "\u4e2d\u6587")]),
+ ])
+ self._run_check("<a title='\u30c6\u30b9\u30c8' "
+ "href='\u30c6\u30b9\u30c8.html'>", [
+ ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
+ ("href", "\u30c6\u30b9\u30c8.html")]),
+ ])
+ self._run_check('<a title="\u30c6\u30b9\u30c8" '
+ 'href="\u30c6\u30b9\u30c8.html">', [
+ ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"),
+ ("href", "\u30c6\u30b9\u30c8.html")]),
+ ])
+
def test_attr_entity_replacement(self):
self._run_check("""<a b='&amp;&gt;&lt;&quot;&apos;'>""", [
("starttag", "a", [("b", "&><\"'")]),
diff --git a/Misc/NEWS b/Misc/NEWS
index 37eb250..fa03fc1 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -94,6 +94,8 @@ Core and Builtins
Library
-------
+- Issue #7311: fix html.parser to accept non-ASCII attribute values.
+
- Issue #11605: email.parser.BytesFeedParser was incorrectly converting multipart
subpararts with an 8bit CTE into unicode instead of preserving the bytes.
diff --git a/Modules/faulthandler.c b/Modules/faulthandler.c
index abc12a0..76cadf3 100644
--- a/Modules/faulthandler.c
+++ b/Modules/faulthandler.c
@@ -5,6 +5,9 @@
#include <frameobject.h>
#include <signal.h>
+/* Allocate at maximum 100 MB of the stack to raise the stack overflow */
+#define STACK_OVERFLOW_MAX_SIZE (100*1024*1024)
+
#ifdef WITH_THREAD
# define FAULTHANDLER_LATER
#endif
@@ -16,9 +19,6 @@
# define FAULTHANDLER_USER
#endif
-/* Allocate at maximum 100 MB of the stack to raise the stack overflow */
-#define STACK_OVERFLOW_MAX_SIZE (100*1024*1024)
-
#define PUTS(fd, str) write(fd, str, strlen(str))
#ifdef HAVE_SIGACTION
@@ -218,12 +218,7 @@ faulthandler_dump_traceback_py(PyObject *self,
This function is signal safe and should only call signal safe functions. */
static void
-faulthandler_fatal_error(
- int signum
-#ifdef HAVE_SIGACTION
- , siginfo_t *siginfo, void *ucontext
-#endif
-)
+faulthandler_fatal_error(int signum)
{
const int fd = fatal_error.fd;
unsigned int i;
@@ -255,6 +250,7 @@ faulthandler_fatal_error(
PUTS(fd, handler->name);
PUTS(fd, "\n\n");
+#ifdef WITH_THREAD
/* SIGSEGV, SIGFPE, SIGABRT, SIGBUS and SIGILL are synchronous signals and
so are delivered to the thread that caused the fault. Get the Python
thread state of the current thread.
@@ -264,6 +260,9 @@ faulthandler_fatal_error(
used. Read the thread local storage (TLS) instead: call
PyGILState_GetThisThreadState(). */
tstate = PyGILState_GetThisThreadState();
+#else
+ tstate = PyThreadState_Get();
+#endif
if (tstate == NULL)
return;
@@ -320,7 +319,7 @@ faulthandler_enable(PyObject *self, PyObject *args, PyObject *kwargs)
for (i=0; i < faulthandler_nsignals; i++) {
handler = &faulthandler_handlers[i];
#ifdef HAVE_SIGACTION
- action.sa_sigaction = faulthandler_fatal_error;
+ action.sa_handler = faulthandler_fatal_error;
sigemptyset(&action.sa_mask);
/* Do not prevent the signal from being received from within
its own signal handler */
@@ -451,8 +450,8 @@ faulthandler_cancel_dump_tracebacks_later(void)
}
static PyObject*
-faulthandler_dump_traceback_later(PyObject *self,
- PyObject *args, PyObject *kwargs)
+faulthandler_dump_tracebacks_later(PyObject *self,
+ PyObject *args, PyObject *kwargs)
{
static char *kwlist[] = {"timeout", "repeat", "file", "exit", NULL};
double timeout;
@@ -461,6 +460,7 @@ faulthandler_dump_traceback_later(PyObject *self,
PyObject *file = NULL;
int fd;
int exit = 0;
+ PyThreadState *tstate;
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"d|iOi:dump_tracebacks_later", kwlist,
@@ -477,6 +477,13 @@ faulthandler_dump_traceback_later(PyObject *self,
return NULL;
}
+ tstate = PyThreadState_Get();
+ if (tstate == NULL) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "unable to get the current thread state");
+ return NULL;
+ }
+
file = faulthandler_get_fileno(file, &fd);
if (file == NULL)
return NULL;
@@ -490,7 +497,7 @@ faulthandler_dump_traceback_later(PyObject *self,
thread.fd = fd;
thread.timeout_ms = timeout_ms;
thread.repeat = repeat;
- thread.interp = PyThreadState_Get()->interp;
+ thread.interp = tstate->interp;
thread.exit = exit;
/* Arm these locks to serve as events when released */
@@ -537,10 +544,14 @@ faulthandler_user(int signum)
if (!user->enabled)
return;
+#ifdef WITH_THREAD
/* PyThreadState_Get() doesn't give the state of the current thread if
the thread doesn't hold the GIL. Read the thread local storage (TLS)
instead: call PyGILState_GetThisThreadState(). */
tstate = PyGILState_GetThisThreadState();
+#else
+ tstate = PyThreadState_Get();
+#endif
if (user->all_threads)
_Py_DumpTracebackThreads(user->fd, user->interp, tstate);
@@ -826,7 +837,7 @@ static int
faulthandler_traverse(PyObject *module, visitproc visit, void *arg)
{
#ifdef FAULTHANDLER_USER
- unsigned int index;
+ unsigned int signum;
#endif
#ifdef FAULTHANDLER_LATER
@@ -834,8 +845,8 @@ faulthandler_traverse(PyObject *module, visitproc visit, void *arg)
#endif
#ifdef FAULTHANDLER_USER
if (user_signals != NULL) {
- for (index=0; index < NSIG; index++)
- Py_VISIT(user_signals[index].file);
+ for (signum=0; signum < NSIG; signum++)
+ Py_VISIT(user_signals[signum].file);
}
#endif
Py_VISIT(fatal_error.file);
@@ -861,10 +872,11 @@ static PyMethodDef module_methods[] = {
"if all_threads is True, into file")},
#ifdef FAULTHANDLER_LATER
{"dump_tracebacks_later",
- (PyCFunction)faulthandler_dump_traceback_later, METH_VARARGS|METH_KEYWORDS,
- PyDoc_STR("dump_tracebacks_later(timeout, repeat=False, file=sys.stderr):\n"
+ (PyCFunction)faulthandler_dump_tracebacks_later, METH_VARARGS|METH_KEYWORDS,
+ PyDoc_STR("dump_tracebacks_later(timeout, repeat=False, file=sys.stderrn, exit=False):\n"
"dump the traceback of all threads in timeout seconds,\n"
- "or each timeout seconds if repeat is True.")},
+ "or each timeout seconds if repeat is True. If exit is True, "
+ "call _exit(1) which is not safe.")},
{"cancel_dump_tracebacks_later",
(PyCFunction)faulthandler_cancel_dump_tracebacks_later_py, METH_NOARGS,
PyDoc_STR("cancel_dump_tracebacks_later():\ncancel the previous call "
diff --git a/setup.py b/setup.py
index d37bcd0..cbaf1ab 100644
--- a/setup.py
+++ b/setup.py
@@ -373,6 +373,8 @@ class PyBuildExt(build_ext):
def add_multiarch_paths(self):
# Debian/Ubuntu multiarch support.
# https://wiki.ubuntu.com/MultiarchSpec
+ if not find_executable('dpkg-architecture'):
+ return
tmpfile = os.path.join(self.build_temp, 'multiarch')
if not os.path.exists(self.build_temp):
os.makedirs(self.build_temp)