summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Peters <tim.peters@gmail.com>2002-04-19 18:41:46 (GMT)
committerTim Peters <tim.peters@gmail.com>2002-04-19 18:41:46 (GMT)
commit4f109c1cf99682bcc23aec3a3bf17fcfa3abd7ac (patch)
tree1f26ab46c1f0fa7b232d601c460c42ad85f9aac2
parente6b63e685b96e8f3a4f6f5f2ac99e0c924025dbf (diff)
downloadcpython-4f109c1cf99682bcc23aec3a3bf17fcfa3abd7ac.zip
cpython-4f109c1cf99682bcc23aec3a3bf17fcfa3abd7ac.tar.gz
cpython-4f109c1cf99682bcc23aec3a3bf17fcfa3abd7ac.tar.bz2
Added a stop-list to reduce the size of the full text search index. Fred,
populate the "stop_list" triple-quoted string with your favorite handful of stop words.
-rw-r--r--Doc/tools/prechm.py32
1 files changed, 29 insertions, 3 deletions
diff --git a/Doc/tools/prechm.py b/Doc/tools/prechm.py
index 9c94d8c..2ce85c4 100644
--- a/Doc/tools/prechm.py
+++ b/Doc/tools/prechm.py
@@ -1,4 +1,4 @@
-'''
+"""
Makes the necesary files to convert from plain html of
Python 1.5 and 1.5.x Documentation to
Microsoft HTML Help format version 1.1
@@ -13,7 +13,7 @@
project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
Python 2.2 docs, at <http://alldunn.com/python/>.
-'''
+"""
import sys
import os
@@ -38,12 +38,12 @@ Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
# user-visible features (visible buttons, tabs, etc).
project_template = '''
[OPTIONS]
-Compatibility=1.1
Compiled file=%(arch)s.chm
Contents file=%(arch)s.hhc
Default Window=%(arch)s
Default topic=index.html
Display compile progress=No
+Full text search stop list file=%(arch)s.stp
Full-text search=Yes
Index file=%(arch)s.hhk
Language=0x409
@@ -80,6 +80,23 @@ object_sitemap = '''
</OBJECT>
'''
+
+# List of words the full text search facility shouldn't index. This
+# becomes file ARCH.stp. Note that this list must be pretty small!
+# Different versions of the MS docs claim the file has a maximum size of
+# 256 or 512 bytes (including \r\n at the end of each line).
+# Note that "and", "or", "not" and "near" are operators in the search
+# language, so not point indexing them even if wanted to.
+stop_list = '''
+a an and
+is
+near
+not
+of
+or
+the
+'''
+
# Library Doc list of tuples:
# each 'book' : ( Dir, Title, First page, Content page, Index page)
#
@@ -335,6 +352,15 @@ def do_it(args = None) :
library = supported_libraries[ version ]
if not (('-p','') in optlist) :
+ fname = arch + '.stp'
+ f = openfile(fname)
+ print "Building stoplist", fname, "..."
+ words = stop_list.split()
+ words.sort()
+ for word in words:
+ print >> f, word
+ f.close()
+
f = openfile(arch + '.hhp')
print "Building Project..."
do_project(library, f, arch, version)