summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2003-05-10 07:10:12 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2003-05-10 07:10:12 (GMT)
commit5467d4c0e31e9db305a4899a44d7978f83e96649 (patch)
treecf52a41492d6c1271a4f32ace0a62237daceb63a
parentb7b4ce27f74901258f0b3af1fb9483d8f38feab8 (diff)
downloadcpython-5467d4c0e31e9db305a4899a44d7978f83e96649.zip
cpython-5467d4c0e31e9db305a4899a44d7978f83e96649.tar.gz
cpython-5467d4c0e31e9db305a4899a44d7978f83e96649.tar.bz2
Patch #612627: Add encoding attribute to file objects, and determine
the terminal encoding on Windows and Unix.
-rw-r--r--Doc/api/concrete.tex6
-rw-r--r--Doc/lib/libstdtypes.tex14
-rw-r--r--Include/fileobject.h2
-rw-r--r--Misc/NEWS3
-rw-r--r--Objects/fileobject.c42
-rw-r--r--Python/sysmodule.c43
6 files changed, 109 insertions, 1 deletions
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
index f8cbc28..2c14596 100644
--- a/Doc/api/concrete.tex
+++ b/Doc/api/concrete.tex
@@ -2029,6 +2029,12 @@ implementation detail and may change in future releases of Python.
creation.
\end{cfuncdesc}
+\begin{cfuncdesc}{int}{PyFile_Encoding}{PyFileObject *p, char *enc}
+ Set the file's encoding for Unicode output to \var{enc}. Return
+ 1 on success and 0 on failure.
+ \versionadded{2.3}
+\end{cfuncdesc}
+
\begin{cfuncdesc}{int}{PyFile_SoftSpace}{PyObject *p, int newflag}
This function exists for internal use by the interpreter. Sets the
\member{softspace} attribute of \var{p} to \var{newflag} and
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex
index 04cdbca..951d088 100644
--- a/Doc/lib/libstdtypes.tex
+++ b/Doc/lib/libstdtypes.tex
@@ -1372,6 +1372,20 @@ read-only attribute; the \method{close()} method changes the value.
It may not be available on all file-like objects.
\end{memberdesc}
+\begin{memberdesc}[file]{encoding}
+The encoding that this file uses. When Unicode strings are written
+to a file, they will be converted to byte strings using this encoding.
+In addition, when the file is connected to a terminal, the attribute
+gives the encoding that the terminal is likely to use (that
+information might be incorrect if the user has misconfigured the
+terminal). The attribute is read-only and may not be present on
+all file-like objects. It may also be \code{None}, in which case
+the file uses the system default encoding for converting Unicode
+strings.
+
+\versionadded{2.3}
+\end{memberdesc}
+
\begin{memberdesc}[file]{mode}
The I/O mode for the file. If the file was created using the
\function{open()} built-in function, this will be the value of the
diff --git a/Include/fileobject.h b/Include/fileobject.h
index c351b4d..e2053df 100644
--- a/Include/fileobject.h
+++ b/Include/fileobject.h
@@ -24,6 +24,7 @@ typedef struct {
int f_newlinetypes; /* Types of newlines seen */
int f_skipnextlf; /* Skip next \n */
#endif
+ PyObject *f_encoding;
} PyFileObject;
PyAPI_DATA(PyTypeObject) PyFile_Type;
@@ -33,6 +34,7 @@ PyAPI_DATA(PyTypeObject) PyFile_Type;
PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *);
PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int);
+PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *);
PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *,
int (*)(FILE *));
PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *);
diff --git a/Misc/NEWS b/Misc/NEWS
index b943391..098145a 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -12,6 +12,9 @@ What's New in Python 2.3 beta 2?
Core and builtins
-----------------
+- The encoding attribute has been added for file objects, and set to
+ the terminal encoding on Unix and Windows.
+
- The softspace attribute of file objects became read-only by oversight.
It's writable again.
diff --git a/Objects/fileobject.c b/Objects/fileobject.c
index 92cfa5b..40ce759 100644
--- a/Objects/fileobject.c
+++ b/Objects/fileobject.c
@@ -116,6 +116,7 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
Py_DECREF(f->f_name);
Py_DECREF(f->f_mode);
+ Py_DECREF(f->f_encoding);
#ifdef Py_USING_UNICODE
if (wname)
f->f_name = PyUnicode_FromObject(wname);
@@ -133,7 +134,9 @@ fill_file_fields(PyFileObject *f, FILE *fp, char *name, char *mode,
f->f_newlinetypes = NEWLINE_UNKNOWN;
f->f_skipnextlf = 0;
#endif
-
+ Py_INCREF(Py_None);
+ f->f_encoding = Py_None;
+
if (f->f_name == NULL || f->f_mode == NULL)
return NULL;
f->f_fp = fp;
@@ -302,6 +305,21 @@ PyFile_SetBufSize(PyObject *f, int bufsize)
}
}
+/* Set the encoding used to output Unicode strings.
+ Returh 1 on success, 0 on failure. */
+
+int
+PyFile_SetEncoding(PyObject *f, const char *enc)
+{
+ PyFileObject *file = (PyFileObject*)f;
+ PyObject *str = PyString_FromString(enc);
+ if (!str)
+ return 0;
+ Py_DECREF(file->f_encoding);
+ file->f_encoding = str;
+ return 1;
+}
+
static PyObject *
err_closed(void)
{
@@ -323,6 +341,7 @@ file_dealloc(PyFileObject *f)
}
Py_XDECREF(f->f_name);
Py_XDECREF(f->f_mode);
+ Py_XDECREF(f->f_encoding);
drop_readahead(f);
f->ob_type->tp_free((PyObject *)f);
}
@@ -1667,6 +1686,8 @@ static PyMemberDef file_memberlist[] = {
"file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
{"name", T_OBJECT, OFF(f_name), RO,
"file name"},
+ {"encoding", T_OBJECT, OFF(f_encoding), RO,
+ "file encoding"},
/* getattr(f, "closed") is implemented without this table */
{NULL} /* Sentinel */
};
@@ -1851,6 +1872,8 @@ file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
((PyFileObject *)self)->f_name = not_yet_string;
Py_INCREF(not_yet_string);
((PyFileObject *)self)->f_mode = not_yet_string;
+ Py_INCREF(Py_None);
+ ((PyFileObject *)self)->f_encoding = Py_None;
}
return self;
}
@@ -2034,11 +2057,28 @@ PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
}
else if (PyFile_Check(f)) {
FILE *fp = PyFile_AsFile(f);
+ PyObject *enc = ((PyFileObject*)f)->f_encoding;
+ int result;
if (fp == NULL) {
err_closed();
return -1;
}
+#ifdef Py_USING_UNICODE
+ if (PyUnicode_Check(v) && enc != Py_None) {
+ char *cenc = PyString_AS_STRING(enc);
+ value = PyUnicode_AsEncodedString(v, cenc, "strict");
+ if (value == NULL)
+ return -1;
+ } else {
+ value = v;
+ Py_INCREF(value);
+ }
+ result = PyObject_Print(value, fp, flags);
+ Py_DECREF(value);
+ return result;
+#else
return PyObject_Print(v, fp, flags);
+#endif
}
writer = PyObject_GetAttrString(f, "write");
if (writer == NULL)
diff --git a/Python/sysmodule.c b/Python/sysmodule.c
index d06d18a..edbc2bf 100644
--- a/Python/sysmodule.c
+++ b/Python/sysmodule.c
@@ -36,6 +36,15 @@ extern const char *PyWin_DLLVersionString;
#include <unixlib.h>
#endif
+#ifdef MS_WINDOWS
+#include <windows.h>
+#endif
+
+#ifdef HAVE_LANGINFO_H
+#include <locale.h>
+#include <langinfo.h>
+#endif
+
PyObject *
PySys_GetObject(char *name)
{
@@ -881,6 +890,12 @@ _PySys_Init(void)
PyObject *m, *v, *sysdict;
PyObject *sysin, *sysout, *syserr;
char *s;
+#ifdef MS_WINDOWS
+ char buf[10];
+#endif
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+ char *oldloc, *codeset;
+#endif
m = Py_InitModule3("sys", sys_methods, sys_doc);
sysdict = PyModule_GetDict(m);
@@ -890,6 +905,34 @@ _PySys_Init(void)
syserr = PyFile_FromFile(stderr, "<stderr>", "w", NULL);
if (PyErr_Occurred())
return NULL;
+#ifdef MS_WINDOWS
+ if(isatty(_fileno(stdin))){
+ sprintf(buf, "cp%d", GetConsoleCP());
+ if (!PyFile_SetEncoding(sysin, buf))
+ return NULL;
+ }
+ if(isatty(_fileno(stdout))) {
+ sprintf(buf, "cp%d", GetConsoleOutputCP());
+ if (!PyFile_SetEncoding(sysout, buf))
+ return NULL;
+ }
+#endif
+
+#if defined(HAVE_LANGINFO_H) && defined(CODESET)
+ oldloc = setlocale(LC_CTYPE, 0);
+ setlocale(LC_CTYPE, "");
+ codeset = nl_langinfo(CODESET);
+ setlocale(LC_CTYPE, oldloc);
+ if(codeset && isatty(fileno(stdin))){
+ if (!PyFile_SetEncoding(sysin, codeset))
+ return NULL;
+ }
+ if(codeset && isatty(fileno(stdout))) {
+ if (!PyFile_SetEncoding(sysout, codeset))
+ return NULL;
+ }
+#endif
+
PyDict_SetItemString(sysdict, "stdin", sysin);
PyDict_SetItemString(sysdict, "stdout", sysout);
PyDict_SetItemString(sysdict, "stderr", syserr);