diff options
author | animalize <animalize@users.noreply.github.com> | 2018-10-08 21:20:54 (GMT) |
---|---|---|
committer | Steve Dower <steve.dower@microsoft.com> | 2018-10-08 21:20:54 (GMT) |
commit | 6261ae9b01fb8429b779169f8de37ff567c144e8 (patch) | |
tree | aefaf4d450ab64edbbd8a0566e2801cd58f82182 | |
parent | 60d230c78f1e46832fded8b3a8ee604aafa5cc11 (diff) | |
download | cpython-6261ae9b01fb8429b779169f8de37ff567c144e8.zip cpython-6261ae9b01fb8429b779169f8de37ff567c144e8.tar.gz cpython-6261ae9b01fb8429b779169f8de37ff567c144e8.tar.bz2 |
bpo-32174: Let .chm document display non-ASCII characters properly (GH-9758)
Let .chm document display non-ASCII characters properly
Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual effect on some MBCS Windows systems.
-rw-r--r-- | Doc/conf.py | 2 | ||||
-rw-r--r-- | Doc/tools/extensions/escape4chm.py | 39 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Documentation/2018-10-08-19-15-28.bpo-32174.YO9CYm.rst | 2 |
3 files changed, 42 insertions, 1 deletions
diff --git a/Doc/conf.py b/Doc/conf.py index d8efce0..7f720ce 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -14,7 +14,7 @@ sys.path.append(os.path.abspath('includes')) # --------------------- extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest', - 'pyspecific', 'c_annotations'] + 'pyspecific', 'c_annotations', 'escape4chm'] # General substitutions. project = 'Python' diff --git a/Doc/tools/extensions/escape4chm.py b/Doc/tools/extensions/escape4chm.py new file mode 100644 index 0000000..6f2e357 --- /dev/null +++ b/Doc/tools/extensions/escape4chm.py @@ -0,0 +1,39 @@ +""" +Escape the `body` part of .chm source file to 7-bit ASCII, to fix visual +effect on some MBCS Windows systems. + +https://bugs.python.org/issue32174 +""" + +import re +from html.entities import codepoint2name + +# escape the characters which codepoint > 0x7F +def _process(string): + def escape(matchobj): + codepoint = ord(matchobj.group(0)) + + name = codepoint2name.get(codepoint) + if name is None: + return '&#%d;' % codepoint + else: + return '&%s;' % name + + return re.sub(r'[^\x00-\x7F]', escape, string) + +def escape_for_chm(app, pagename, templatename, context, doctree): + # only works for .chm output + if not hasattr(app.builder, 'name') or app.builder.name != 'htmlhelp': + return + + # escape the `body` part to 7-bit ASCII + body = context.get('body') + if body is not None: + context['body'] = _process(body) + +def setup(app): + # `html-page-context` event emitted when the HTML builder has + # created a context dictionary to render a template with. + app.connect('html-page-context', escape_for_chm) + + return {'version': '1.0', 'parallel_read_safe': True} diff --git a/Misc/NEWS.d/next/Documentation/2018-10-08-19-15-28.bpo-32174.YO9CYm.rst b/Misc/NEWS.d/next/Documentation/2018-10-08-19-15-28.bpo-32174.YO9CYm.rst new file mode 100644 index 0000000..a11a4b3 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2018-10-08-19-15-28.bpo-32174.YO9CYm.rst @@ -0,0 +1,2 @@ +chm document displays non-ASCII charaters properly on some MBCS Windows +systems. |