From d2171d2ba414def2ecf27b694ea27c2e9fde0fcf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20v=2E=20L=C3=B6wis?= <martin@v.loewis.de>
Date: Thu, 6 Nov 2003 20:47:57 +0000
Subject: Overallocate target buffer for normalization more early. Fixes
 #834676. Backported to 2.3.

---
 Lib/test/test_normalization.py |  3 +++
 Modules/unicodedata.c          | 12 +++++++-----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/Lib/test/test_normalization.py b/Lib/test/test_normalization.py
index 046dca6..0cbc2b4 100644
--- a/Lib/test/test_normalization.py
+++ b/Lib/test/test_normalization.py
@@ -84,5 +84,8 @@ def test_main():
             continue
         assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c
 
+    # Check for bug 834676
+    normalize('NFC',u'\ud55c\uae00')
+
 if __name__ == "__main__":
     test_main()
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
index d266ad7..311db29 100644
--- a/Modules/unicodedata.c
+++ b/Modules/unicodedata.c
@@ -311,12 +311,14 @@ nfd_nfkd(PyObject *input, int k)
         stack[stackptr++] = *i++;
         while(stackptr) {
             Py_UNICODE code = stack[--stackptr];
-            if (!space) {
-                space = PyString_GET_SIZE(result) + 10;
-                if (PyUnicode_Resize(&result, space) == -1)
+            /* Hangul Decomposition adds three characters in
+               a single step, so we need atleast that much room. */
+            if (space < 3) {
+                int newsize = PyString_GET_SIZE(result) + 10;
+                space += 10;
+                if (PyUnicode_Resize(&result, newsize) == -1)
                     return NULL;
-                o = PyUnicode_AS_UNICODE(result) + space - 10;
-                space = 10;
+                o = PyUnicode_AS_UNICODE(result) + newsize - space;
             }
             /* Hangul Decomposition. */
             if (SBase <= code && code < (SBase+SCount)) {
-- 
cgit v0.12