summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAmaury Forgeot d'Arc <amauryfa@gmail.com>2008-11-11 23:04:59 (GMT)
committerAmaury Forgeot d'Arc <amauryfa@gmail.com>2008-11-11 23:04:59 (GMT)
commit9a5499b4e54f1d74dfe41772d780511c8ad1120c (patch)
tree4aac685d27ffba930b8c36f4dc356aed6728a0b3
parentd3013ffa49e889ff96ed967f574aa01b91b09f12 (diff)
downloadcpython-9a5499b4e54f1d74dfe41772d780511c8ad1120c.zip
cpython-9a5499b4e54f1d74dfe41772d780511c8ad1120c.tar.gz
cpython-9a5499b4e54f1d74dfe41772d780511c8ad1120c.tar.bz2
#3705: Command-line arguments were not correctly decoded when the
terminal does not use UTF8. Now the code propagates the unicode string as far as possible, and avoids the conversion to char* which implicitely uses utf-8. Reviewed by Benjamin.
-rw-r--r--Lib/test/test_cmd_line.py6
-rw-r--r--Misc/NEWS3
-rw-r--r--Modules/main.c40
-rw-r--r--Python/import.c11
4 files changed, 35 insertions, 25 deletions
diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py
index d63dfa1..fc0e3a7 100644
--- a/Lib/test/test_cmd_line.py
+++ b/Lib/test/test_cmd_line.py
@@ -135,6 +135,12 @@ class CmdLineTest(unittest.TestCase):
self.exit_code('-c', 'pass'),
0)
+ # Test handling of non-ascii data
+ command = "assert(ord('\xe9') == 0xe9)"
+ self.assertEqual(
+ self.exit_code('-c', command),
+ 0)
+
def test_main():
test.support.run_unittest(CmdLineTest)
diff --git a/Misc/NEWS b/Misc/NEWS
index 3a8f467..24afb0e 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,9 @@ What's New in Python 3.0 release candiate 3?
Core and Builtins
-----------------
+- Issue #3705: Command-line arguments were not correctly decoded when the
+ terminal does not use UTF8.
+
Library
-------
diff --git a/Modules/main.c b/Modules/main.c
index 6fdc33a..78913ee 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -287,7 +287,7 @@ Py_Main(int argc, wchar_t **argv)
{
int c;
int sts;
- char *command = NULL;
+ wchar_t *command = NULL;
wchar_t *filename = NULL;
wchar_t *module = NULL;
FILE *fp = stdin;
@@ -299,7 +299,6 @@ Py_Main(int argc, wchar_t **argv)
int version = 0;
int saw_unbuffered_flag = 0;
PyCompilerFlags cf;
- char *oldloc;
cf.cf_flags = 0;
@@ -310,30 +309,19 @@ Py_Main(int argc, wchar_t **argv)
while ((c = _PyOS_GetOpt(argc, argv, PROGRAM_OPTS)) != EOF) {
if (c == 'c') {
- size_t r1, r2;
- oldloc = setlocale(LC_ALL, NULL);
- setlocale(LC_ALL, "");
- r1 = wcslen(_PyOS_optarg);
- r2 = wcstombs(NULL, _PyOS_optarg, r1);
- if (r2 == (size_t) -1)
- Py_FatalError(
- "cannot convert character encoding of -c argument");
- if (r2 > r1)
- r1 = r2;
- r1 += 2;
+ size_t len;
/* -c is the last option; following arguments
that look like options are left for the
command to interpret. */
- command = (char *)malloc(r1);
+
+ len = wcslen(_PyOS_optarg) + 1 + 1;
+ command = (wchar_t *)malloc(sizeof(wchar_t) * len);
if (command == NULL)
Py_FatalError(
"not enough memory to copy -c argument");
- r2 = wcstombs(command, _PyOS_optarg, r1);
- if (r2 > r1-1)
- Py_FatalError(
- "not enough memory to copy -c argument");
- strcat(command, "\n");
- setlocale(LC_ALL, oldloc);
+ wcscpy(command, _PyOS_optarg);
+ command[len - 2] = '\n';
+ command[len - 1] = 0;
break;
}
@@ -543,8 +531,18 @@ Py_Main(int argc, wchar_t **argv)
}
if (command) {
- sts = PyRun_SimpleStringFlags(command, &cf) != 0;
+ PyObject *commandObj = PyUnicode_FromWideChar(
+ command, wcslen(command));
free(command);
+ if (commandObj != NULL) {
+ sts = PyRun_SimpleStringFlags(
+ _PyUnicode_AsString(commandObj), &cf) != 0;
+ }
+ else {
+ PyErr_Print();
+ sts = 1;
+ }
+ Py_DECREF(commandObj);
} else if (module) {
sts = RunModule(module, 1);
}
diff --git a/Python/import.c b/Python/import.c
index 564ace88..2bad2e5 100644
--- a/Python/import.c
+++ b/Python/import.c
@@ -2793,6 +2793,7 @@ call_find_module(char *name, PyObject *path)
{
extern int fclose(FILE *);
PyObject *fob, *ret;
+ PyObject *pathobj;
struct filedescr *fdp;
char pathname[MAXPATHLEN+1];
FILE *fp = NULL;
@@ -2836,9 +2837,9 @@ call_find_module(char *name, PyObject *path)
fob = Py_None;
Py_INCREF(fob);
}
- ret = Py_BuildValue("Os(ssi)",
- fob, pathname, fdp->suffix, fdp->mode, fdp->type);
- Py_DECREF(fob);
+ pathobj = PyUnicode_DecodeFSDefault(pathname);
+ ret = Py_BuildValue("NN(ssi)",
+ fob, pathobj, fdp->suffix, fdp->mode, fdp->type);
PyMem_FREE(found_encoding);
return ret;
@@ -2849,7 +2850,9 @@ imp_find_module(PyObject *self, PyObject *args)
{
char *name;
PyObject *path = NULL;
- if (!PyArg_ParseTuple(args, "s|O:find_module", &name, &path))
+ if (!PyArg_ParseTuple(args, "es|O:find_module",
+ Py_FileSystemDefaultEncoding, &name,
+ &path))
return NULL;
return call_find_module(name, path);
}