summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVictor Stinner <vstinner@python.org>2023-07-04 07:29:52 (GMT)
committerGitHub <noreply@github.com>2023-07-04 07:29:52 (GMT)
commitd8c5d76da2d5c3e8f9c05fcfc59dc1aaaa1fe6e1 (patch)
tree8d76adb30c62539cb44b065e2286d7809dab8021
parent3406f8cce542ea4edf4153c0fac5216df283a9b1 (diff)
downloadcpython-d8c5d76da2d5c3e8f9c05fcfc59dc1aaaa1fe6e1.zip
cpython-d8c5d76da2d5c3e8f9c05fcfc59dc1aaaa1fe6e1.tar.gz
cpython-d8c5d76da2d5c3e8f9c05fcfc59dc1aaaa1fe6e1.tar.bz2
gh-106320: Remove private _PyUnicode codecs C API functions (#106385)
Remove private _PyUnicode codecs C API functions: move them to the internal C API (pycore_unicodeobject.h). No longer export most of these functions.
-rw-r--r--Include/cpython/unicodeobject.h106
-rw-r--r--Include/internal/pycore_unicodeobject.h100
-rw-r--r--Parser/string_parser.c1
3 files changed, 101 insertions, 106 deletions
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index fcd9c28..dc8f643 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -461,112 +461,6 @@ PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
#define _PyUnicode_AsString PyUnicode_AsUTF8
-/* --- UTF-7 Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
- PyObject *unicode, /* Unicode object */
- int base64SetO, /* Encode RFC2152 Set O characters in base64 */
- int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
- const char *errors /* error handling */
- );
-
-/* --- UTF-8 Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
- PyObject *unicode,
- const char *errors);
-
-/* --- UTF-32 Codecs ------------------------------------------------------ */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
- PyObject *object, /* Unicode object */
- const char *errors, /* error handling */
- int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
- );
-
-/* --- UTF-16 Codecs ------------------------------------------------------ */
-
-/* Returns a Python string object holding the UTF-16 encoded value of
- the Unicode data.
-
- If byteorder is not 0, output is written according to the following
- byte order:
-
- byteorder == -1: little endian
- byteorder == 0: native byte order (writes a BOM mark)
- byteorder == 1: big endian
-
- If byteorder is 0, the output string will always start with the
- Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
- prepended.
-*/
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
- PyObject* unicode, /* Unicode object */
- const char *errors, /* error handling */
- int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
- );
-
-/* --- Unicode-Escape Codecs ---------------------------------------------- */
-
-/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
- const char *string, /* Unicode-Escape encoded string */
- Py_ssize_t length, /* size of string */
- const char *errors, /* error handling */
- Py_ssize_t *consumed /* bytes consumed */
-);
-/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
- chars. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
- const char *string, /* Unicode-Escape encoded string */
- Py_ssize_t length, /* size of string */
- const char *errors, /* error handling */
- Py_ssize_t *consumed, /* bytes consumed */
- const char **first_invalid_escape /* on return, points to first
- invalid escaped char in
- string. */
-);
-
-/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
-
-/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
- const char *string, /* Unicode-Escape encoded string */
- Py_ssize_t length, /* size of string */
- const char *errors, /* error handling */
- Py_ssize_t *consumed /* bytes consumed */
-);
-
-/* --- Latin-1 Codecs ----------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
- PyObject* unicode,
- const char* errors);
-
-/* --- ASCII Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
- PyObject* unicode,
- const char* errors);
-
-/* --- Character Map Codecs ----------------------------------------------- */
-
-/* Translate an Unicode object by applying a character mapping table to
- it and return the resulting Unicode object.
-
- The mapping table must map Unicode ordinal integers to Unicode strings,
- Unicode ordinal integers or None (causing deletion of the character).
-
- Mapping tables may be dictionaries or sequences. Unmapped character
- ordinals (ones which cause a LookupError) are left untouched and
- are copied as-is.
-*/
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
- PyObject *unicode, /* Unicode object */
- PyObject *mapping, /* encoding mapping */
- const char *errors /* error handling */
- );
-
/* --- Decimal Encoder ---------------------------------------------------- */
/* Coverts a Unicode object holding a decimal value to an ASCII string
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index da01f57..dd20ac1 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -177,6 +177,106 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
Py_ssize_t start,
Py_ssize_t end);
+/* --- UTF-7 Codecs ------------------------------------------------------- */
+
+extern PyObject* _PyUnicode_EncodeUTF7(
+ PyObject *unicode, /* Unicode object */
+ int base64SetO, /* Encode RFC2152 Set O characters in base64 */
+ int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
+ const char *errors); /* error handling */
+
+/* --- UTF-8 Codecs ------------------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
+ PyObject *unicode,
+ const char *errors);
+
+/* --- UTF-32 Codecs ------------------------------------------------------ */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
+ PyObject *object, /* Unicode object */
+ const char *errors, /* error handling */
+ int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+
+/* --- UTF-16 Codecs ------------------------------------------------------ */
+
+/* Returns a Python string object holding the UTF-16 encoded value of
+ the Unicode data.
+
+ If byteorder is not 0, output is written according to the following
+ byte order:
+
+ byteorder == -1: little endian
+ byteorder == 0: native byte order (writes a BOM mark)
+ byteorder == 1: big endian
+
+ If byteorder is 0, the output string will always start with the
+ Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
+ prepended.
+*/
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
+ PyObject* unicode, /* Unicode object */
+ const char *errors, /* error handling */
+ int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+
+/* --- Unicode-Escape Codecs ---------------------------------------------- */
+
+/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
+extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ Py_ssize_t *consumed); /* bytes consumed */
+
+/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+ chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ Py_ssize_t *consumed, /* bytes consumed */
+ const char **first_invalid_escape); /* on return, points to first
+ invalid escaped char in
+ string. */
+
+/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
+
+/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
+extern PyObject* _PyUnicode_DecodeRawUnicodeEscapeStateful(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ Py_ssize_t *consumed); /* bytes consumed */
+
+/* --- Latin-1 Codecs ----------------------------------------------------- */
+
+extern PyObject* _PyUnicode_AsLatin1String(
+ PyObject* unicode,
+ const char* errors);
+
+/* --- ASCII Codecs ------------------------------------------------------- */
+
+extern PyObject* _PyUnicode_AsASCIIString(
+ PyObject* unicode,
+ const char* errors);
+
+/* --- Character Map Codecs ----------------------------------------------- */
+
+/* Translate an Unicode object by applying a character mapping table to
+ it and return the resulting Unicode object.
+
+ The mapping table must map Unicode ordinal integers to Unicode strings,
+ Unicode ordinal integers or None (causing deletion of the character).
+
+ Mapping tables may be dictionaries or sequences. Unmapped character
+ ordinals (ones which cause a LookupError) are left untouched and
+ are copied as-is.
+*/
+extern PyObject* _PyUnicode_EncodeCharmap(
+ PyObject *unicode, /* Unicode object */
+ PyObject *mapping, /* encoding mapping */
+ const char *errors); /* error handling */
+
/* --- Methods & Slots ---------------------------------------------------- */
extern PyObject* _PyUnicode_JoinArray(
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index 20459e8..bc1f99d 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -1,6 +1,7 @@
#include <stdbool.h>
#include <Python.h>
+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()
#include "tokenizer.h"
#include "pegen.h"