diff options
author | Martin v. Löwis <martin@v.loewis.de> | 2001-09-30 20:32:11 (GMT) |
---|---|---|
committer | Martin v. Löwis <martin@v.loewis.de> | 2001-09-30 20:32:11 (GMT) |
commit | 16dc7f44b1116aab58897bc7e94cb972488206fc (patch) | |
tree | b1e90d9ca27e6dbdd0cd6b6d66fcb8a333a746a3 /Modules/binascii.c | |
parent | 5f12d755a82312673c35e8224b2bde7ced159c52 (diff) | |
download | cpython-16dc7f44b1116aab58897bc7e94cb972488206fc.zip cpython-16dc7f44b1116aab58897bc7e94cb972488206fc.tar.gz cpython-16dc7f44b1116aab58897bc7e94cb972488206fc.tar.bz2 |
Patch #462190, patch #464070: Support quoted printable in the binascii module.
Decode and encode underscores for header style encoding. Fixes bug #463996.
Diffstat (limited to 'Modules/binascii.c')
-rw-r--r-- | Modules/binascii.c | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/Modules/binascii.c b/Modules/binascii.c index 00a2805..484f656 100644 --- a/Modules/binascii.c +++ b/Modules/binascii.c @@ -42,6 +42,15 @@ ** does make the performance sub-optimal. Oh well, too bad... ** ** Jack Jansen, CWI, July 1995. +** +** Added support for quoted-printable encoding, based on rfc 1521 et al +** quoted-printable encoding specifies that non printable characters (anything +** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value +** of the character. It also specifies some other behavior to enable 8bit data +** in a mail message with little difficulty (maximum line sizes, protecting +** some cases of whitespace, etc). +** +** Brandon Long, September 2001. */ @@ -971,6 +980,289 @@ static char doc_unhexlify[] = hexstr must contain an even number of hex digits (upper or lower case).\n\ This function is also available as \"unhexlify()\""; +static int table_hex[128] = { + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, + -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, + -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 +}; + +#define hexval(c) table_hex[(unsigned int)(c)] + +#define MAXLINESIZE 76 + +static char doc_a2b_qp[] = "Decode a string of qp-encoded data"; + +static PyObject* +binascii_a2b_qp(PyObject *self, PyObject *args, PyObject *kwargs) +{ + unsigned int in, out; + char ch; + unsigned char *data, *odata; + unsigned int datalen = 0; + PyObject *rv; + static char *kwlist[] = {"data", "header", NULL}; + int header = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i", kwlist, &data, + &datalen, &header)) + return NULL; + + /* We allocate the output same size as input, this is overkill */ + odata = (char *) calloc(1, datalen); + + if (odata == NULL) { + PyErr_NoMemory(); + return NULL; + } + + in = out = 0; + while (in < datalen) { + if (data[in] == '=') { + in++; + if (in >= datalen) break; + /* Soft line breaks */ + if ((data[in] == '\n') || (data[in] == '\r') || + (data[in] == ' ') || (data[in] == '\t')) { + if (data[in] != '\n') { + while (in < datalen && data[in] != '\n') in++; + } + if (in < datalen) in++; + } + else if (data[in] == '=') { + /* broken case from broken python qp */ + odata[out++] = '='; + in++; + } + else if (((data[in] >= 'A' && data[in] <= 'F') || + (data[in] >= 'a' && data[in] <= 'f') || + (data[in] >= '0' && data[in] <= '9')) && + ((data[in+1] >= 'A' && data[in+1] <= 'F') || + (data[in+1] >= 'a' && data[in+1] <= 'f') || + (data[in+1] >= '0' && data[in+1] <= '9'))) { + /* hexval */ + ch = hexval(data[in]) << 4; + in++; + ch |= hexval(data[in]); + in++; + odata[out++] = ch; + } + else { + odata[out++] = '='; + } + } + else if (header && data[in] == '_') { + odata[out++] = ' '; + in++; + } + else { + odata[out] = data[in]; + in++; + out++; + } + } + if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) { + free (odata); + return NULL; + } + free (odata); + return rv; +} + +static int +to_hex (unsigned char ch, unsigned char *s) +{ + unsigned int uvalue = ch; + + s[1] = "0123456789ABCDEF"[uvalue % 16]; + uvalue = (uvalue / 16); + s[0] = "0123456789ABCDEF"[uvalue % 16]; + return 0; +} + +static char doc_b2a_qp[] = +"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\ + Encode a string using quoted-printable encoding. \n\ +\n\ +On encoding, when istext is set, newlines are not encoded, and white \n\ +space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\ +both encoded. When quotetabs is set, space and tabs are encoded."; + +/* XXX: This is ridiculously complicated to be backward compatible + * (mostly) with the quopri module. It doesn't re-create the quopri + * module bug where text ending in CRLF has the CR encoded */ +static PyObject* +binascii_b2a_qp (PyObject *self, PyObject *args, PyObject *kwargs) +{ + unsigned int in, out; + unsigned char *data, *odata; + unsigned int datalen = 0, odatalen = 0; + PyObject *rv; + unsigned int linelen = 0; + static char *kwlist[] = {"data", "quotetabs", "istext", "header", NULL}; + int istext = 1; + int quotetabs = 0; + int header = 0; + unsigned char ch; + int crlf = 0; + unsigned char *p; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|iii", kwlist, &data, + &datalen, "etabs, &istext, &header)) + return NULL; + + /* See if this string is using CRLF line ends */ + /* XXX: this function has the side effect of converting all of + * the end of lines to be the same depending on this detection + * here */ + p = strchr(data, '\n'); + if ((p != NULL) && (p > data) && (*(p-1) == '\r')) + crlf = 1; + + /* First, scan to see how many characters need to be encoded */ + in = 0; + while (in < datalen) { + if ((data[in] > 126) || + (data[in] == '=') || + (header && data[in] == '_') || + ((data[in] == '.') && (linelen == 1)) || + (!istext && ((data[in] == '\r') || (data[in] == '\n'))) || + ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) || + ((data[in] < 33) && + (data[in] != '\r') && (data[in] != '\n') && + (quotetabs && ((data[in] != '\t') || (data[in] != ' '))))) + { + if ((linelen + 3) >= MAXLINESIZE) { + linelen = 0; + if (crlf) + odatalen += 3; + else + odatalen += 2; + } + linelen += 3; + odatalen += 3; + in++; + } + else { + if (istext && + ((data[in] == '\n') || + ((in+1 < datalen) && (data[in] == '\r') && + (data[in+1] == '\n')))) + { + linelen = 0; + /* Protect against whitespace on end of line */ + if (in && ((data[in-1] == ' ') || (data[in-1] == '\t'))) + odatalen += 2; + if (crlf) + odatalen += 2; + else + odatalen += 1; + if (data[in] == '\r') + in += 2; + else + in++; + } + else { + if ((in + 1 != datalen) && + (data[in+1] != '\n') && + (linelen + 1) >= MAXLINESIZE) { + linelen = 0; + if (crlf) + odatalen += 3; + else + odatalen += 2; + } + linelen++; + odatalen++; + in++; + } + } + } + + odata = (char *) calloc(1, odatalen); + + if (odata == NULL) { + PyErr_NoMemory(); + return NULL; + } + + in = out = linelen = 0; + while (in < datalen) { + if ((data[in] > 126) || + (data[in] == '=') || + (header && data[in] == '_') || + ((data[in] == '.') && (linelen == 1)) || + (!istext && ((data[in] == '\r') || (data[in] == '\n'))) || + ((data[in] == '\t' || data[in] == ' ') && (in + 1 == datalen)) || + ((data[in] < 33) && + (data[in] != '\r') && (data[in] != '\n') && + (quotetabs && ((data[in] != '\t') || (data[in] != ' '))))) + { + if ((linelen + 3 )>= MAXLINESIZE) { + odata[out++] = '='; + if (crlf) odata[out++] = '\r'; + odata[out++] = '\n'; + linelen = 0; + } + odata[out++] = '='; + to_hex(data[in], &odata[out]); + out += 2; + in++; + linelen += 3; + } + else { + if (istext && + ((data[in] == '\n') || + ((in+1 < datalen) && (data[in] == '\r') && + (data[in+1] == '\n')))) + { + linelen = 0; + /* Protect against whitespace on end of line */ + if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) { + ch = odata[out-1]; + odata[out-1] = '='; + to_hex(ch, &odata[out]); + out += 2; + } + + if (crlf) odata[out++] = '\r'; + odata[out++] = '\n'; + if (data[in] == '\r') + in += 2; + else + in++; + } + else { + if ((in + 1 != datalen) && + (data[in+1] != '\n') && + (linelen + 1) >= MAXLINESIZE) { + odata[out++] = '='; + if (crlf) odata[out++] = '\r'; + odata[out++] = '\n'; + linelen = 0; + } + linelen++; + if (header && data[in] == ' ') { + odata[out++] = '_'; + in++; + } + else { + odata[out++] = data[in++]; + } + } + } + } + if ((rv = PyString_FromStringAndSize(odata, out)) == NULL) { + free (odata); + return NULL; + } + free (odata); + return rv; +} /* List of functions defined in the module */ @@ -990,6 +1282,10 @@ static struct PyMethodDef binascii_module_methods[] = { doc_rledecode_hqx}, {"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx}, {"crc32", binascii_crc32, METH_VARARGS, doc_crc32}, + {"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS | METH_KEYWORDS, + doc_a2b_qp}, + {"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS | METH_KEYWORDS, + doc_b2a_qp}, {NULL, NULL} /* sentinel */ }; |