summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntoine Pitrou <solipsis@pitrou.net>2010-10-31 15:26:04 (GMT)
committerAntoine Pitrou <solipsis@pitrou.net>2010-10-31 15:26:04 (GMT)
commite974571d36009e327a97bb83389cf05c2b858288 (patch)
tree2f4985227e462222cdb88331aa6aa04c29d71a00
parent65b4af34d706206f45f0a4fb5768548b176f977e (diff)
downloadcpython-e974571d36009e327a97bb83389cf05c2b858288.zip
cpython-e974571d36009e327a97bb83389cf05c2b858288.tar.gz
cpython-e974571d36009e327a97bb83389cf05c2b858288.tar.bz2
Issue #10160: Speed up operator.attrgetter. Patch by Christos Georgiou.
-rw-r--r--Doc/library/operator.rst2
-rw-r--r--Lib/test/test_operator.py9
-rw-r--r--Misc/NEWS2
-rw-r--r--Modules/operator.c133
4 files changed, 114 insertions, 32 deletions
diff --git a/Doc/library/operator.rst b/Doc/library/operator.rst
index 2d03061..efefaa6 100644
--- a/Doc/library/operator.rst
+++ b/Doc/library/operator.rst
@@ -336,6 +336,8 @@ expect a function argument.
b.date)``. Equivalent to::
def attrgetter(*items):
+ if any(not isinstance(item, str) for item in items):
+ raise TypeError('attribute name must be a string')
if len(items) == 1:
attr = items[0]
def g(obj):
diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py
index aa3219f..f291e52 100644
--- a/Lib/test/test_operator.py
+++ b/Lib/test/test_operator.py
@@ -275,8 +275,7 @@ class OperatorTestCase(unittest.TestCase):
self.assertEqual(f(a), 'arthur')
f = operator.attrgetter('rank')
self.assertRaises(AttributeError, f, a)
- f = operator.attrgetter(2)
- self.assertRaises(TypeError, f, a)
+ self.assertRaises(TypeError, operator.attrgetter, 2)
self.assertRaises(TypeError, operator.attrgetter)
# multiple gets
@@ -285,7 +284,7 @@ class OperatorTestCase(unittest.TestCase):
record.y = 'Y'
record.z = 'Z'
self.assertEqual(operator.attrgetter('x','z','y')(record), ('X', 'Z', 'Y'))
- self.assertRaises(TypeError, operator.attrgetter('x', (), 'y'), record)
+ self.assertRaises(TypeError, operator.attrgetter, ('x', (), 'y'))
class C(object):
def __getattr__(self, name):
@@ -304,6 +303,10 @@ class OperatorTestCase(unittest.TestCase):
self.assertEqual(f(a), ('arthur', 'thomas'))
f = operator.attrgetter('name', 'child.name', 'child.child.name')
self.assertRaises(AttributeError, f, a)
+ f = operator.attrgetter('child.')
+ self.assertRaises(AttributeError, f, a)
+ f = operator.attrgetter('.child')
+ self.assertRaises(AttributeError, f, a)
a.child.child = A()
a.child.child.name = 'johnson'
diff --git a/Misc/NEWS b/Misc/NEWS
index fc8c648..ca66be0 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -59,6 +59,8 @@ Core and Builtins
Library
-------
+- Issue #10160: Speed up operator.attrgetter. Patch by Christos Georgiou.
+
- logging: Added style option to basicConfig() to allow %, {} or $-formatting.
- Issue #5729: json.dumps() now supports using a string such as '\t'
diff --git a/Modules/operator.c b/Modules/operator.c
index 8ca4d2d..48bdae0 100644
--- a/Modules/operator.c
+++ b/Modules/operator.c
@@ -383,7 +383,7 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
attrgetterobject *ag;
PyObject *attr;
- Py_ssize_t nattrs;
+ Py_ssize_t nattrs, idx, char_idx;
if (!_PyArg_NoKeywords("attrgetter()", kwds))
return NULL;
@@ -392,15 +392,92 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
if (nattrs <= 1) {
if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &attr))
return NULL;
- } else
- attr = args;
+ }
+
+ attr = PyTuple_New(nattrs);
+ if (attr == NULL)
+ return NULL;
+
+ /* prepare attr while checking args */
+ for (idx = 0; idx < nattrs; ++idx) {
+ PyObject *item = PyTuple_GET_ITEM(args, idx);
+ Py_ssize_t item_len;
+ Py_UNICODE *item_buffer;
+ int dot_count;
+
+ if (!PyUnicode_Check(item)) {
+ PyErr_SetString(PyExc_TypeError,
+ "attribute name must be a string");
+ Py_DECREF(attr);
+ return NULL;
+ }
+ item_len = PyUnicode_GET_SIZE(item);
+ item_buffer = PyUnicode_AS_UNICODE(item);
+
+ /* check whethere the string is dotted */
+ dot_count = 0;
+ for (char_idx = 0; char_idx < item_len; ++char_idx) {
+ if (item_buffer[char_idx] == (Py_UNICODE)'.')
+ ++dot_count;
+ }
+
+ if (dot_count == 0) {
+ Py_INCREF(item);
+ PyUnicode_InternInPlace(&item);
+ PyTuple_SET_ITEM(attr, idx, item);
+ } else { /* make it a tuple of non-dotted attrnames */
+ PyObject *attr_chain = PyTuple_New(dot_count + 1);
+ PyObject *attr_chain_item;
+
+ if (attr_chain == NULL) {
+ Py_DECREF(attr);
+ return NULL;
+ }
+
+ Py_ssize_t unibuff_from = 0;
+ Py_ssize_t unibuff_till = 0;
+ Py_ssize_t attr_chain_idx = 0;
+ for (; dot_count > 0; --dot_count) {
+ while (item_buffer[unibuff_till] != (Py_UNICODE)'.') {
+ ++unibuff_till;
+ }
+ attr_chain_item = PyUnicode_FromUnicode(
+ item_buffer + unibuff_from,
+ unibuff_till - unibuff_from);
+ if (attr_chain_item == NULL) {
+ Py_DECREF(attr_chain);
+ Py_DECREF(attr);
+ return NULL;
+ }
+ PyUnicode_InternInPlace(&attr_chain_item);
+ PyTuple_SET_ITEM(attr_chain, attr_chain_idx, attr_chain_item);
+ ++attr_chain_idx;
+ unibuff_till = unibuff_from = unibuff_till + 1;
+ }
+
+ /* now add the last dotless name */
+ attr_chain_item = PyUnicode_FromUnicode(
+ item_buffer + unibuff_from,
+ item_len - unibuff_from);
+ if (attr_chain_item == NULL) {
+ Py_DECREF(attr_chain);
+ Py_DECREF(attr);
+ return NULL;
+ }
+ PyUnicode_InternInPlace(&attr_chain_item);
+ PyTuple_SET_ITEM(attr_chain, attr_chain_idx, attr_chain_item);
+
+ PyTuple_SET_ITEM(attr, idx, attr_chain);
+ }
+ }
/* create attrgetterobject structure */
ag = PyObject_GC_New(attrgetterobject, &attrgetter_type);
- if (ag == NULL)
+ if (ag == NULL) {
+ Py_DECREF(attr);
return NULL;
+ }
- Py_INCREF(attr);
ag->attr = attr;
ag->nattrs = nattrs;
@@ -426,33 +503,31 @@ attrgetter_traverse(attrgetterobject *ag, visitproc visit, void *arg)
static PyObject *
dotted_getattr(PyObject *obj, PyObject *attr)
{
- char *s, *p;
-
- if (!PyUnicode_Check(attr)) {
- PyErr_SetString(PyExc_TypeError,
- "attribute name must be a string");
- return NULL;
- }
-
- s = _PyUnicode_AsString(attr);
- Py_INCREF(obj);
- for (;;) {
- PyObject *newobj, *str;
- p = strchr(s, '.');
- str = p ? PyUnicode_FromStringAndSize(s, (p-s)) :
- PyUnicode_FromString(s);
- if (str == NULL) {
+ PyObject *newobj;
+
+ /* attr is either a tuple or instance of str.
+ Ensured by the setup code of attrgetter_new */
+ if (PyTuple_CheckExact(attr)) { /* chained getattr */
+ Py_ssize_t name_idx = 0, name_count;
+ PyObject *attr_name;
+
+ name_count = PyTuple_GET_SIZE(attr);
+ Py_INCREF(obj);
+ for (name_idx = 0; name_idx < name_count; ++name_idx) {
+ attr_name = PyTuple_GET_ITEM(attr, name_idx);
+ newobj = PyObject_GetAttr(obj, attr_name);
Py_DECREF(obj);
- return NULL;
+ if (newobj == NULL) {
+ return NULL;
+ }
+ /* here */
+ obj = newobj;
}
- newobj = PyObject_GetAttr(obj, str);
- Py_DECREF(str);
- Py_DECREF(obj);
+ } else { /* single getattr */
+ newobj = PyObject_GetAttr(obj, attr);
if (newobj == NULL)
return NULL;
obj = newobj;
- if (p == NULL) break;
- s = p+1;
}
return obj;
@@ -466,8 +541,8 @@ attrgetter_call(attrgetterobject *ag, PyObject *args, PyObject *kw)
if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &obj))
return NULL;
- if (ag->nattrs == 1)
- return dotted_getattr(obj, ag->attr);
+ if (ag->nattrs == 1) /* ag->attr is always a tuple */
+ return dotted_getattr(obj, PyTuple_GET_ITEM(ag->attr, 0));
assert(PyTuple_Check(ag->attr));
assert(PyTuple_GET_SIZE(ag->attr) == nattrs);