Doc/tools/undoc_symbols.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

#! /usr/bin/env python

"""\
This script prints out a list of undocumented symbols found in
Python include files, prefixed by their tag kind.

Pass Python's include files to ctags, parse the output into a
dictionary mapping symbol names to tag kinds.

Then, the .tex files from Python docs are read into a giant string.

Finally all symbols not found in the docs are written to standard
output, prefixed with their tag kind.
"""

# Which kind of tags do we need?
TAG_KINDS = "dpst"

# Doc sections to use
DOCSECTIONS = ["api"]# ["api", "ext"]

# Only print symbols starting with this prefix,
# to get all symbols, use an empty string
PREFIXES = ("Py", "PY")

INCLUDEPATTERN = "*.h"

# end of customization section


# Tested with EXUBERANT CTAGS
# see http://ctags.sourceforge.net
#
# ctags fields are separated by tabs.
# The first field is the name, the last field the type:
# d macro definitions (and #undef names)
# e enumerators
# f function definitions
# g enumeration names
# m class, struct, or union members
# n namespaces
# p function prototypes and declarations
# s structure names
# t typedefs
# u union names
# v variable definitions
# x extern and forward variable declarations

import os, glob, re, sys

def findnames(file, prefixes=()):
    names = {}
    for line in file:
        if line[0] == '!':
            continue
        fields = line.split()
        name, tag = fields[0], fields[-1]
        if tag == 'd' and name.endswith('_H'):
            continue
        if prefixes:
            sw = name.startswith
            for prefix in prefixes:
                if sw(prefix):
                    names[name] = tag
        else:
            names[name] = tag
    return names

def print_undoc_symbols(prefix, docdir, incdir):
    docs = []

    for sect in DOCSECTIONS:
        for file in glob.glob(os.path.join(docdir, sect, "*.tex")):
            docs.append(open(file).read())

    docs = "\n".join(docs)

    incfiles = os.path.join(incdir, INCLUDEPATTERN)

    fp = os.popen("ctags -IPyAPI_FUNC -IPy_GCC_ATTRIBUTE --c-types=%s -f - %s"
                  % (TAG_KINDS, incfiles))
    dict = findnames(fp, prefix)
    names = dict.keys()
    names.sort()
    for name in names:
        if not re.search("%s\\W" % name, docs):
            print dict[name], name

if __name__ == '__main__':
    srcdir = os.path.dirname(sys.argv[0])
    incdir = os.path.normpath(os.path.join(srcdir, "../../Include"))
    docdir = os.path.normpath(os.path.join(srcdir, ".."))

    print_undoc_symbols(PREFIXES, docdir, incdir)