diff options
author | Serhiy Storchaka <storchaka@gmail.com> | 2024-01-22 13:34:16 (GMT) |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-22 13:34:16 (GMT) |
commit | c8351a617b8970dbe0d3af721c6aea873019c466 (patch) | |
tree | 3d38887fb1dada30112ecd0f7c4b41245ff00dab | |
parent | 2f2ddabd1a02e3095b751100b94b529e4e0bcd20 (diff) | |
download | cpython-c8351a617b8970dbe0d3af721c6aea873019c466.zip cpython-c8351a617b8970dbe0d3af721c6aea873019c466.tar.gz cpython-c8351a617b8970dbe0d3af721c6aea873019c466.tar.bz2 |
gh-113796: Add more validation checks in the csv.Dialect constructor (GH-113797)
ValueError is now raised if the same character is used in different roles.
-rw-r--r-- | Lib/test/test_csv.py | 67 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst | 3 | ||||
-rw-r--r-- | Modules/_csv.c | 39 |
3 files changed, 96 insertions, 13 deletions
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 36da86e..69fef59 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase): in TestDialectRegistry. """ def _test_arg_valid(self, ctor, arg): + ctor(arg) self.assertRaises(TypeError, ctor) self.assertRaises(TypeError, ctor, None) - self.assertRaises(TypeError, ctor, arg, bad_attr = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 'XX') + self.assertRaises(TypeError, ctor, arg, bad_attr=0) + self.assertRaises(TypeError, ctor, arg, delimiter='') + self.assertRaises(TypeError, ctor, arg, escapechar='') + self.assertRaises(TypeError, ctor, arg, quotechar='') + self.assertRaises(TypeError, ctor, arg, delimiter='^^') + self.assertRaises(TypeError, ctor, arg, escapechar='^^') + self.assertRaises(TypeError, ctor, arg, quotechar='^^') self.assertRaises(csv.Error, ctor, arg, 'foo') self.assertRaises(TypeError, ctor, arg, delimiter=None) self.assertRaises(TypeError, ctor, arg, delimiter=1) + self.assertRaises(TypeError, ctor, arg, escapechar=1) self.assertRaises(TypeError, ctor, arg, quotechar=1) self.assertRaises(TypeError, ctor, arg, lineterminator=None) self.assertRaises(TypeError, ctor, arg, lineterminator=1) @@ -46,6 +52,40 @@ class Test_Csv(unittest.TestCase): quoting=csv.QUOTE_ALL, quotechar=None) self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_NONE, quotechar='') + self.assertRaises(ValueError, ctor, arg, delimiter='\n') + self.assertRaises(ValueError, ctor, arg, escapechar='\n') + self.assertRaises(ValueError, ctor, arg, quotechar='\n') + self.assertRaises(ValueError, ctor, arg, delimiter='\r') + self.assertRaises(ValueError, ctor, arg, escapechar='\r') + self.assertRaises(ValueError, ctor, arg, quotechar='\r') + ctor(arg, delimiter=' ') + ctor(arg, escapechar=' ') + ctor(arg, quotechar=' ') + ctor(arg, delimiter='\t', skipinitialspace=True) + ctor(arg, escapechar='\t', skipinitialspace=True) + ctor(arg, quotechar='\t', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + delimiter=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + escapechar=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + quotechar=' ', skipinitialspace=True) + ctor(arg, delimiter='^') + ctor(arg, escapechar='^') + ctor(arg, quotechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^') + self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^') + ctor(arg, delimiter='\x85') + ctor(arg, escapechar='\x85') + ctor(arg, quotechar='\x85') + ctor(arg, lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + delimiter='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + escapechar='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + quotechar='\x85', lineterminator='\x85') def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) @@ -535,14 +575,6 @@ class TestDialectRegistry(unittest.TestCase): finally: csv.unregister_dialect('testC') - def test_bad_dialect(self): - # Unknown parameter - self.assertRaises(TypeError, csv.reader, [], bad_attr = 0) - # Bad values - self.assertRaises(TypeError, csv.reader, [], delimiter = None) - self.assertRaises(TypeError, csv.reader, [], quoting = -1) - self.assertRaises(TypeError, csv.reader, [], quoting = 100) - def test_copy(self): for name in csv.list_dialects(): dialect = csv.get_dialect(name) @@ -1088,10 +1120,15 @@ class TestDialectValidity(unittest.TestCase): '"lineterminator" must be a string') def test_invalid_chars(self): - def create_invalid(field_name, value): + def create_invalid(field_name, value, **kwargs): class mydialect(csv.Dialect): - pass + delimiter = ',' + quoting = csv.QUOTE_ALL + quotechar = '"' + lineterminator = '\r\n' setattr(mydialect, field_name, value) + for field_name, value in kwargs.items(): + setattr(mydialect, field_name, value) d = mydialect() for field_name in ("delimiter", "escapechar", "quotechar"): @@ -1100,6 +1137,10 @@ class TestDialectValidity(unittest.TestCase): self.assertRaises(csv.Error, create_invalid, field_name, "abc") self.assertRaises(csv.Error, create_invalid, field_name, b'x') self.assertRaises(csv.Error, create_invalid, field_name, 5) + self.assertRaises(ValueError, create_invalid, field_name, "\n") + self.assertRaises(ValueError, create_invalid, field_name, "\r") + self.assertRaises(ValueError, create_invalid, field_name, " ", + skipinitialspace=True) class TestSniffer(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst b/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst new file mode 100644 index 0000000..e9d4aba --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst @@ -0,0 +1,3 @@ +Add more validation checks in the :class:`csv.Dialect` constructor. +:exc:`ValueError` is now raised if the same character is used in different +roles. diff --git a/Modules/_csv.c b/Modules/_csv.c index 8d94156..929c215 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -331,6 +331,33 @@ dialect_check_quoting(int quoting) return -1; } +static int +dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect) +{ + if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) { + PyErr_Format(PyExc_ValueError, "bad %s value", name); + return -1; + } + if (PyUnicode_FindChar( + dialect->lineterminator, c, 0, + PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) + { + PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name); + return -1; + } + return 0; +} + + static int +dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2) +{ + if (c1 == c2 && c1 != NOT_SET) { + PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2); + return -1; + } + return 0; +} + #define D_OFF(x) offsetof(DialectObj, x) static struct PyMemberDef Dialect_memberlist[] = { @@ -508,6 +535,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); goto err; } + if (dialect_check_char("delimiter", self->delimiter, self) || + dialect_check_char("escapechar", self->escapechar, self) || + dialect_check_char("quotechar", self->quotechar, self) || + dialect_check_chars("delimiter", "escapechar", + self->delimiter, self->escapechar) || + dialect_check_chars("delimiter", "quotechar", + self->delimiter, self->quotechar) || + dialect_check_chars("escapechar", "quotechar", + self->escapechar, self->quotechar)) + { + goto err; + } ret = Py_NewRef(self); err: |