summaryrefslogtreecommitdiffstats
path: root/Modules/binascii.c
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2001-09-30 20:32:11 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2001-09-30 20:32:11 (GMT)
commit16dc7f44b1116aab58897bc7e94cb972488206fc (patch)
treeb1e90d9ca27e6dbdd0cd6b6d66fcb8a333a746a3 /Modules/binascii.c
parent5f12d755a82312673c35e8224b2bde7ced159c52 (diff)
downloadcpython-16dc7f44b1116aab58897bc7e94cb972488206fc.zip
cpython-16dc7f44b1116aab58897bc7e94cb972488206fc.tar.gz
cpython-16dc7f44b1116aab58897bc7e94cb972488206fc.tar.bz2
Patch #462190, patch #464070: Support quoted printable in the binascii module.
Decode and encode underscores for header style encoding. Fixes bug #463996.
Diffstat (limited to 'Modules/binascii.c')
-rw-r--r--Modules/binascii.c296
1 files changed, 296 insertions, 0 deletions
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 00a2805..484f656 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -42,6 +42,15 @@
** does make the performance sub-optimal. Oh well, too bad...
**
** Jack Jansen, CWI, July 1995.
+**
+** Added support for quoted-printable encoding, based on rfc 1521 et al
+** quoted-printable encoding specifies that non printable characters (anything
+** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
+** of the character. It also specifies some other behavior to enable 8bit data
+** in a mail message with little difficulty (maximum line sizes, protecting
+** some cases of whitespace, etc).
+**
+** Brandon Long, September 2001.
*/
@@ -971,6 +980,289 @@ static char doc_unhexlify[] =
hexstr must contain an even number of hex digits (upper or lower case).\n\
This function is also available as \"unhexlify()\"";
+static int table_hex[128] = {
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
+ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
+ -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
+};
+
+#define hexval(c) table_hex[(unsigned int)(c)]
+
+#define MAXLINESIZE 76
+
+static char doc_a2b_qp[] = "Decode a string of qp-encoded data";
+
+static PyObject*
+binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ unsigned int in, out;
+ char ch;
+ unsigned char *data, *odata;
+ unsigned int datalen = 0;
+ PyObject *rv;
+ static char *kwlist[] = {"data", "header", NULL};
+ int header = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data,
+ &datalen, &header))
+ return NULL;
+
+ /* We allocate the output same size as input, this is overkill */
+ odata = (char *) calloc(1, datalen);
+
+ if (odata == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ in = out = 0;
+ while (in < datalen) {
+ if (data[in] == '=') {
+ in++;
+ if (in >= datalen) break;
+ /* Soft line breaks */
+ if ((data[in] == '\n') || (data[in] == '\r') ||
+ (data[in] == ' ') || (data[in] == '\t')) {
+ if (data[in] != '\n') {
+ while (in < datalen && data[in] != '\n') in++;
+ }
+ if (in < datalen) in++;
+ }
+ else if (data[in] == '=') {
+ /* broken case from broken python qp */
+ odata[out++] = '=';
+ in++;
+ }
+ else if (((data[in] >= 'A' && data[in] <= 'F') ||
+ (data[in] >= 'a' && data[in] <= 'f') ||
+ (data[in] >= '0' && data[in] <= '9')) &&
+ ((data[in+1] >= 'A' && data[in+1] <= 'F') ||
+ (data[in+1] >= 'a' && data[in+1] <= 'f') ||
+ (data[in+1] >= '0' && data[in+1] <= '9'))) {
+ /* hexval */
+ ch = hexval(data[in]) << 4;
+ in++;
+ ch |= hexval(data[in]);
+ in++;
+ odata[out++] = ch;
+ }
+ else {
+ odata[out++] = '=';
+ }
+ }
+ else if (header && data[in] == '_') {
+ odata[out++] = ' ';
+ in++;
+ }
+ else {
+ odata[out] = data[in];
+ in++;
+ out++;
+ }
+ }
+ if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ free (odata);
+ return NULL;
+ }
+ free (odata);
+ return rv;
+}
+
+static int
+to_hex (unsigned char ch, unsigned char *s)
+{
+ unsigned int uvalue = ch;
+
+ s[1] = "0123456789ABCDEF"[uvalue % 16];
+ uvalue = (uvalue / 16);
+ s[0] = "0123456789ABCDEF"[uvalue % 16];
+ return 0;
+}
+
+static char doc_b2a_qp[] =
+"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
+ Encode a string using quoted-printable encoding. \n\
+\n\
+On encoding, when istext is set, newlines are not encoded, and white \n\
+space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
+both encoded. When quotetabs is set, space and tabs are encoded.";
+
+/* XXX: This is ridiculously complicated to be backward compatible
+ * (mostly) with the quopri module. It doesn't re-create the quopri
+ * module bug where text ending in CRLF has the CR encoded */
+static PyObject*
+binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs)
+{
+ unsigned int in, out;
+ unsigned char *data, *odata;
+ unsigned int datalen = 0, odatalen = 0;
+ PyObject *rv;
+ unsigned int linelen = 0;
+ static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL};
+ int istext = 1;
+ int quotetabs = 0;
+ int header = 0;
+ unsigned char ch;
+ int crlf = 0;
+ unsigned char *p;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data,
+ &datalen, &quotetabs, &istext, &header))
+ return NULL;
+
+ /* See if this string is using CRLF line ends */
+ /* XXX: this function has the side effect of converting all of
+ * the end of lines to be the same depending on this detection
+ * here */
+ p = strchr(data, '\n');
+ if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
+ crlf = 1;
+
+ /* First, scan to see how many characters need to be encoded */
+ in = 0;
+ while (in < datalen) {
+ if ((data[in] > 126) ||
+ (data[in] == '=') ||
+ (header && data[in] == '_') ||
+ ((data[in] == '.') && (linelen == 1)) ||
+ (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ ((data[in] < 33) &&
+ (data[in] != '\r') && (data[in] != '\n') &&
+ (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ {
+ if ((linelen + 3) >= MAXLINESIZE) {
+ linelen = 0;
+ if (crlf)
+ odatalen += 3;
+ else
+ odatalen += 2;
+ }
+ linelen += 3;
+ odatalen += 3;
+ in++;
+ }
+ else {
+ if (istext &&
+ ((data[in] == '\n') ||
+ ((in+1 < datalen) && (data[in] == '\r') &&
+ (data[in+1] == '\n'))))
+ {
+ linelen = 0;
+ /* Protect against whitespace on end of line */
+ if (in && ((data[in-1] == ' ') || (data[in-1] == '\t')))
+ odatalen += 2;
+ if (crlf)
+ odatalen += 2;
+ else
+ odatalen += 1;
+ if (data[in] == '\r')
+ in += 2;
+ else
+ in++;
+ }
+ else {
+ if ((in + 1 != datalen) &&
+ (data[in+1] != '\n') &&
+ (linelen + 1) >= MAXLINESIZE) {
+ linelen = 0;
+ if (crlf)
+ odatalen += 3;
+ else
+ odatalen += 2;
+ }
+ linelen++;
+ odatalen++;
+ in++;
+ }
+ }
+ }
+
+ odata = (char *) calloc(1, odatalen);
+
+ if (odata == NULL) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ in = out = linelen = 0;
+ while (in < datalen) {
+ if ((data[in] > 126) ||
+ (data[in] == '=') ||
+ (header && data[in] == '_') ||
+ ((data[in] == '.') && (linelen == 1)) ||
+ (!istext && ((data[in] == '\r') || (data[in] == '\n'))) ||
+ ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) ||
+ ((data[in] < 33) &&
+ (data[in] != '\r') && (data[in] != '\n') &&
+ (quotetabs && ((data[in] != '\t') || (data[in] != ' ')))))
+ {
+ if ((linelen + 3 )>= MAXLINESIZE) {
+ odata[out++] = '=';
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ linelen = 0;
+ }
+ odata[out++] = '=';
+ to_hex(data[in], &odata[out]);
+ out += 2;
+ in++;
+ linelen += 3;
+ }
+ else {
+ if (istext &&
+ ((data[in] == '\n') ||
+ ((in+1 < datalen) && (data[in] == '\r') &&
+ (data[in+1] == '\n'))))
+ {
+ linelen = 0;
+ /* Protect against whitespace on end of line */
+ if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
+ ch = odata[out-1];
+ odata[out-1] = '=';
+ to_hex(ch, &odata[out]);
+ out += 2;
+ }
+
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ if (data[in] == '\r')
+ in += 2;
+ else
+ in++;
+ }
+ else {
+ if ((in + 1 != datalen) &&
+ (data[in+1] != '\n') &&
+ (linelen + 1) >= MAXLINESIZE) {
+ odata[out++] = '=';
+ if (crlf) odata[out++] = '\r';
+ odata[out++] = '\n';
+ linelen = 0;
+ }
+ linelen++;
+ if (header && data[in] == ' ') {
+ odata[out++] = '_';
+ in++;
+ }
+ else {
+ odata[out++] = data[in++];
+ }
+ }
+ }
+ }
+ if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) {
+ free (odata);
+ return NULL;
+ }
+ free (odata);
+ return rv;
+}
/* List of functions defined in the module */
@@ -990,6 +1282,10 @@ static struct PyMethodDef binascii_module_methods[] = {
doc_rledecode_hqx},
{"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
{"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
+ {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS,
+ doc_a2b_qp},
+ {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS,
+ doc_b2a_qp},
{NULL, NULL} /* sentinel */
};