diff options
author | Fred Drake <fdrake@acm.org> | 1998-03-10 14:02:35 (GMT) |
---|---|---|
committer | Fred Drake <fdrake@acm.org> | 1998-03-10 14:02:35 (GMT) |
commit | ac77b79df6f1c2350d83ada04e6a5020169c7f61 (patch) | |
tree | 0866bea8ebba94214a2dd3c3da9b3a93494a404a /Doc/tools/toc2bkm.py | |
parent | 45e564dd78d3cd90ffbdf7f8e44f74dd40ee27b9 (diff) | |
download | cpython-ac77b79df6f1c2350d83ada04e6a5020169c7f61.zip cpython-ac77b79df6f1c2350d83ada04e6a5020169c7f61.tar.gz cpython-ac77b79df6f1c2350d83ada04e6a5020169c7f61.tar.bz2 |
clean_title(): Clean a little more carefully. Still does funny things with
underscores. Might this be a fundamental PDF limitation? Hm, could
still be a TeX thing.
Diffstat (limited to 'Doc/tools/toc2bkm.py')
-rwxr-xr-x | Doc/tools/toc2bkm.py | 21 |
1 files changed, 15 insertions, 6 deletions
diff --git a/Doc/tools/toc2bkm.py b/Doc/tools/toc2bkm.py index 2b96312..616b79a 100755 --- a/Doc/tools/toc2bkm.py +++ b/Doc/tools/toc2bkm.py @@ -73,19 +73,28 @@ def parse_toc(fp, bigpart=None): return top -hackscore_rx = re.compile(r"\\(hackscore|raisebox)\s*{[^}]*}") -title_rx = re.compile(r"\\[a-zA-Z]+\s*") +hackscore_rx = re.compile(r"\\hackscore\s*{[^}]*}") +raisebox_rx = re.compile(r"\\raisebox\s*{[^}]*}") +title_rx = re.compile(r"\\([a-zA-Z])+\s+") title_trans = string.maketrans("", "") def clean_title(title): - title = hackscore_rx.sub("_", title) + title = raisebox_rx.sub("", title) + title = hackscore_rx.sub(r"\\_", title) + pos = 0 while 1: - m = title_rx.search(title) + m = title_rx.search(title, pos) if m: - title = title[:m.start()] + title[m.end():] + start = m.start() + print "found", `title[start:m.end()]` + if title[start:start+15] != "\\textunderscore": + title = title[:start] + title[m.end():] + pos = start + 1 else: break - return string.translate(title, title_trans, "{}") + title = string.translate(title, title_trans, "{}") + print `title` + return title def write_toc(toc, fp): |