summaryrefslogtreecommitdiffstats
path: root/Tools/scripts/stable_abi.py
blob: aa953b2dfde8772f110b37b59de84055c8537069 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#!/usr/bin/env python

import argparse
import glob
import re
import pathlib
import subprocess
import sys
import sysconfig

EXCLUDED_HEADERS = {
    "bytes_methods.h",
    "cellobject.h",
    "classobject.h",
    "code.h",
    "compile.h",
    "datetime.h",
    "dtoa.h",
    "frameobject.h",
    "funcobject.h",
    "genobject.h",
    "longintrepr.h",
    "parsetok.h",
    "pyarena.h",
    "pyatomic.h",
    "pyctype.h",
    "pydebug.h",
    "pytime.h",
    "symtable.h",
    "token.h",
    "ucnhash.h",
}


def get_exported_symbols(library, dynamic=False):
    # Only look at dynamic symbols
    args = ["nm", "--no-sort"]
    if dynamic:
        args.append("--dynamic")
    args.append(library)
    proc = subprocess.run(args, stdout=subprocess.PIPE, universal_newlines=True)
    if proc.returncode:
        sys.stdout.write(proc.stdout)
        sys.exit(proc.returncode)

    stdout = proc.stdout.rstrip()
    if not stdout:
        raise Exception("command output is empty")

    for line in stdout.splitlines():
        # Split line '0000000000001b80 D PyTextIOWrapper_Type'
        if not line:
            continue

        parts = line.split(maxsplit=2)
        if len(parts) < 3:
            continue

        symbol = parts[-1]
        yield symbol


def check_library(library, abi_funcs, dynamic=False):
    available_symbols = set(get_exported_symbols(library, dynamic))
    missing_symbols = abi_funcs - available_symbols
    if missing_symbols:
        print(
            f"Some symbols from the stable ABI are missing: {', '.join(missing_symbols)}"
        )
        return 1
    return 0


def generate_limited_api_symbols(args):
    if hasattr(sys, "gettotalrefcount"):
        print(
            "Stable ABI symbols cannot be generated from a debug build", file=sys.stderr
        )
        sys.exit(1)
    library = sysconfig.get_config_var("LIBRARY")
    ldlibrary = sysconfig.get_config_var("LDLIBRARY")
    if ldlibrary != library:
        raise Exception("Limited ABI symbols can only be generated from a static build")
    available_symbols = {
        symbol for symbol in get_exported_symbols(library) if symbol.startswith("Py")
    }

    headers = [
        file
        for file in pathlib.Path("Include").glob("*.h")
        if file.name not in EXCLUDED_HEADERS
    ]
    stable_data, stable_exported_data, stable_functions = get_limited_api_definitions(
        headers
    )
    macros = get_limited_api_macros(headers)

    stable_symbols = {
        symbol
        for symbol in (stable_functions | stable_exported_data | stable_data | macros)
        if symbol.startswith("Py") and symbol in available_symbols
    }
    with open(args.output_file, "w") as output_file:
        output_file.write(f"# File generated by 'make regen-limited-abi'\n")
        output_file.write(
            f"# This is NOT an authoritative list of stable ABI symbols\n"
        )
        for symbol in sorted(stable_symbols):
            output_file.write(f"{symbol}\n")
    sys.exit(0)


def get_limited_api_macros(headers):
    """Run the preprocesor over all the header files in "Include" setting
    "-DPy_LIMITED_API" to the correct value for the running version of the interpreter
    and extracting all macro definitions (via adding -dM to the compiler arguments).
    """

    preprocesor_output_with_macros = subprocess.check_output(
        sysconfig.get_config_var("CC").split()
        + [
            # Prevent the expansion of the exported macros so we can capture them later
            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
            f"-DPy_LIMITED_API={sys.version_info.major << 24 | sys.version_info.minor << 16}",
            "-I.",
            "-I./Include",
            "-dM",
            "-E",
        ]
        + [str(file) for file in headers],
        text=True,
        stderr=subprocess.DEVNULL,
    )

    return {
        target
        for _, target in re.findall(
            r"#define (\w+)\s*(?:\(.*?\))?\s+(\w+)", preprocesor_output_with_macros
        )
    }


def get_limited_api_definitions(headers):
    """Run the preprocesor over all the header files in "Include" setting
    "-DPy_LIMITED_API" to the correct value for the running version of the interpreter.

    The limited API symbols will be extracted from the output of this command as it includes
    the prototypes and definitions of all the exported symbols that are in the limited api.

    This function does *NOT* extract the macros defined on the limited API
    """
    preprocesor_output = subprocess.check_output(
        sysconfig.get_config_var("CC").split()
        + [
            # Prevent the expansion of the exported macros so we can capture them later
            "-DPyAPI_FUNC=__PyAPI_FUNC",
            "-DPyAPI_DATA=__PyAPI_DATA",
            "-DEXPORT_DATA=__EXPORT_DATA",
            "-D_Py_NO_RETURN=",
            "-DSIZEOF_WCHAR_T=4",  # The actual value is not important
            f"-DPy_LIMITED_API={sys.version_info.major << 24 | sys.version_info.minor << 16}",
            "-I.",
            "-I./Include",
            "-E",
        ]
        + [str(file) for file in headers],
        text=True,
        stderr=subprocess.DEVNULL,
    )
    stable_functions = set(
        re.findall(r"__PyAPI_FUNC\(.*?\)\s*(.*?)\s*\(", preprocesor_output)
    )
    stable_exported_data = set(
        re.findall(r"__EXPORT_DATA\((.*?)\)", preprocesor_output)
    )
    stable_data = set(
        re.findall(r"__PyAPI_DATA\(.*?\)\s*\(?(.*?)\)?\s*;", preprocesor_output)
    )
    return stable_data, stable_exported_data, stable_functions


def check_symbols(parser_args):
    with open(parser_args.stable_abi_file, "r") as filename:
        abi_funcs = {
            symbol
            for symbol in filename.read().splitlines()
            if symbol and not symbol.startswith("#")
        }

    ret = 0
    # static library
    LIBRARY = sysconfig.get_config_var("LIBRARY")
    if not LIBRARY:
        raise Exception("failed to get LIBRARY variable from sysconfig")
    ret = check_library(LIBRARY, abi_funcs)

    # dynamic library
    LDLIBRARY = sysconfig.get_config_var("LDLIBRARY")
    if not LDLIBRARY:
        raise Exception("failed to get LDLIBRARY variable from sysconfig")
    if LDLIBRARY != LIBRARY:
        ret |= check_library(LDLIBRARY, abi_funcs, dynamic=True)

    sys.exit(ret)


def main():
    parser = argparse.ArgumentParser(description="Process some integers.")
    subparsers = parser.add_subparsers()
    check_parser = subparsers.add_parser(
        "check", help="Check the exported symbols against a given ABI file"
    )
    check_parser.add_argument(
        "stable_abi_file", type=str, help="File with the stable abi functions"
    )
    check_parser.set_defaults(func=check_symbols)
    generate_parser = subparsers.add_parser(
        "generate",
        help="Generate symbols from the header files and the exported symbols",
    )
    generate_parser.add_argument(
        "output_file", type=str, help="File to dump the symbols to"
    )
    generate_parser.set_defaults(func=generate_limited_api_symbols)
    args = parser.parse_args()
    if "func" not in args:
        parser.error("Either 'check' or 'generate' must be used")
        sys.exit(1)

    args.func(args)


if __name__ == "__main__":
    main()