From 9a5499b4e54f1d74dfe41772d780511c8ad1120c Mon Sep 17 00:00:00 2001 From: Amaury Forgeot d'Arc Date: Tue, 11 Nov 2008 23:04:59 +0000 Subject: #3705: Command-line arguments were not correctly decoded when the terminal does not use UTF8. Now the code propagates the unicode string as far as possible, and avoids the conversion to char* which implicitely uses utf-8. Reviewed by Benjamin. --- Lib/test/test_cmd_line.py | 6 ++++++ Misc/NEWS | 3 +++ Modules/main.c | 40 +++++++++++++++++++--------------------- Python/import.c | 11 +++++++---- 4 files changed, 35 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index d63dfa1..fc0e3a7 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -135,6 +135,12 @@ class CmdLineTest(unittest.TestCase): self.exit_code('-c', 'pass'), 0) + # Test handling of non-ascii data + command = "assert(ord('\xe9') == 0xe9)" + self.assertEqual( + self.exit_code('-c', command), + 0) + def test_main(): test.support.run_unittest(CmdLineTest) diff --git a/Misc/NEWS b/Misc/NEWS index 3a8f467..24afb0e 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -13,6 +13,9 @@ What's New in Python 3.0 release candiate 3? Core and Builtins ----------------- +- Issue #3705: Command-line arguments were not correctly decoded when the + terminal does not use UTF8. + Library ------- diff --git a/Modules/main.c b/Modules/main.c index 6fdc33a..78913ee 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -287,7 +287,7 @@ Py_Main(int argc, wchar_t **argv) { int c; int sts; - char *command = NULL; + wchar_t *command = NULL; wchar_t *filename = NULL; wchar_t *module = NULL; FILE *fp = stdin; @@ -299,7 +299,6 @@ Py_Main(int argc, wchar_t **argv) int version = 0; int saw_unbuffered_flag = 0; PyCompilerFlags cf; - char *oldloc; cf.cf_flags = 0; @@ -310,30 +309,19 @@ Py_Main(int argc, wchar_t **argv) while ((c = _PyOS_GetOpt(argc, argv, PROGRAM_OPTS)) != EOF) { if (c == 'c') { - size_t r1, r2; - oldloc = setlocale(LC_ALL, NULL); - setlocale(LC_ALL, ""); - r1 = wcslen(_PyOS_optarg); - r2 = wcstombs(NULL, _PyOS_optarg, r1); - if (r2 == (size_t) -1) - Py_FatalError( - "cannot convert character encoding of -c argument"); - if (r2 > r1) - r1 = r2; - r1 += 2; + size_t len; /* -c is the last option; following arguments that look like options are left for the command to interpret. */ - command = (char *)malloc(r1); + + len = wcslen(_PyOS_optarg) + 1 + 1; + command = (wchar_t *)malloc(sizeof(wchar_t) * len); if (command == NULL) Py_FatalError( "not enough memory to copy -c argument"); - r2 = wcstombs(command, _PyOS_optarg, r1); - if (r2 > r1-1) - Py_FatalError( - "not enough memory to copy -c argument"); - strcat(command, "\n"); - setlocale(LC_ALL, oldloc); + wcscpy(command, _PyOS_optarg); + command[len - 2] = '\n'; + command[len - 1] = 0; break; } @@ -543,8 +531,18 @@ Py_Main(int argc, wchar_t **argv) } if (command) { - sts = PyRun_SimpleStringFlags(command, &cf) != 0; + PyObject *commandObj = PyUnicode_FromWideChar( + command, wcslen(command)); free(command); + if (commandObj != NULL) { + sts = PyRun_SimpleStringFlags( + _PyUnicode_AsString(commandObj), &cf) != 0; + } + else { + PyErr_Print(); + sts = 1; + } + Py_DECREF(commandObj); } else if (module) { sts = RunModule(module, 1); } diff --git a/Python/import.c b/Python/import.c index 564ace88..2bad2e5 100644 --- a/Python/import.c +++ b/Python/import.c @@ -2793,6 +2793,7 @@ call_find_module(char *name, PyObject *path) { extern int fclose(FILE *); PyObject *fob, *ret; + PyObject *pathobj; struct filedescr *fdp; char pathname[MAXPATHLEN+1]; FILE *fp = NULL; @@ -2836,9 +2837,9 @@ call_find_module(char *name, PyObject *path) fob = Py_None; Py_INCREF(fob); } - ret = Py_BuildValue("Os(ssi)", - fob, pathname, fdp->suffix, fdp->mode, fdp->type); - Py_DECREF(fob); + pathobj = PyUnicode_DecodeFSDefault(pathname); + ret = Py_BuildValue("NN(ssi)", + fob, pathobj, fdp->suffix, fdp->mode, fdp->type); PyMem_FREE(found_encoding); return ret; @@ -2849,7 +2850,9 @@ imp_find_module(PyObject *self, PyObject *args) { char *name; PyObject *path = NULL; - if (!PyArg_ParseTuple(args, "s|O:find_module", &name, &path)) + if (!PyArg_ParseTuple(args, "es|O:find_module", + Py_FileSystemDefaultEncoding, &name, + &path)) return NULL; return call_find_module(name, path); } -- cgit v0.12