summaryrefslogtreecommitdiffstats
path: root/Doc
diff options
context:
space:
mode:
authorMiss Islington (bot) <31488909+miss-islington@users.noreply.github.com>2018-10-08 21:26:44 (GMT)
committerGitHub <noreply@github.com>2018-10-08 21:26:44 (GMT)
commit64bcedce8d61e1daa9ff7980cc07988574049b1f (patch)
tree6402cc53df50012609da560374b344a166eba7ad /Doc
parent177254c96f9258a62e3e571c2aee0b642070a374 (diff)
downloadcpython-64bcedce8d61e1daa9ff7980cc07988574049b1f.zip
cpython-64bcedce8d61e1daa9ff7980cc07988574049b1f.tar.gz
cpython-64bcedce8d61e1daa9ff7980cc07988574049b1f.tar.bz2
bpo-32174: Let .chm document display non-ASCII characters properly (GH-9758)
Let .chm document display non-ASCII characters properly Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual effect on some MBCS Windows systems. (cherry picked from commit 6261ae9b01fb8429b779169f8de37ff567c144e8) Co-authored-by: animalize <animalize@users.noreply.github.com>
Diffstat (limited to 'Doc')
-rw-r--r--Doc/conf.py2
-rw-r--r--Doc/tools/extensions/escape4chm.py39
2 files changed, 40 insertions, 1 deletions
diff --git a/Doc/conf.py b/Doc/conf.py
index 43826ec..e2758bc 100644
--- a/Doc/conf.py
+++ b/Doc/conf.py
@@ -13,7 +13,7 @@ sys.path.append(os.path.abspath('tools/extensions'))
# ---------------------
extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
- 'pyspecific', 'c_annotations']
+ 'pyspecific', 'c_annotations', 'escape4chm']
# General substitutions.
project = 'Python'
diff --git a/Doc/tools/extensions/escape4chm.py b/Doc/tools/extensions/escape4chm.py
new file mode 100644
index 0000000..6f2e357
--- /dev/null
+++ b/Doc/tools/extensions/escape4chm.py
@@ -0,0 +1,39 @@
+"""
+Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual
+effect on some MBCS Windows systems.
+
+https://bugs.python.org/issue32174
+"""
+
+import re
+from html.entities import codepoint2name
+
+# escape the characters which codepoint > 0x7F
+def _process(string):
+ def escape(matchobj):
+ codepoint = ord(matchobj.group(0))
+
+ name = codepoint2name.get(codepoint)
+ if name is None:
+ return '&#%d;' % codepoint
+ else:
+ return '&%s;' % name
+
+ return re.sub(r'[^\x00-\x7F]', escape, string)
+
+def escape_for_chm(app, pagename, templatename, context, doctree):
+ # only works for .chm output
+ if not hasattr(app.builder, 'name') or app.builder.name != 'htmlhelp':
+ return
+
+ # escape the `body` part to 7-bit ASCII
+ body = context.get('body')
+ if body is not None:
+ context['body'] = _process(body)
+
+def setup(app):
+ # `html-page-context` event emitted when the HTML builder has
+ # created a context dictionary to render a template with.
+ app.connect('html-page-context', escape_for_chm)
+
+ return {'version': '1.0', 'parallel_read_safe': True}