summaryrefslogtreecommitdiffstats
path: root/PCbuild/get_external.py
blob: 27fbc311bbc1d693cbb9e681b7af0e8e8f2a783f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
#!/usr/bin/env python3

import argparse
import os
import pathlib
import platform
import sys
import tarfile
import time
import urllib.error
import urllib.request
import zipfile


def retrieve_with_retries(download_location, output_path, reporthook,
                          max_retries=7):
    """Download a file with exponential backoff retry and save to disk."""
    for attempt in range(max_retries + 1):
        try:
            resp = urllib.request.urlretrieve(
                download_location,
                output_path,
                reporthook=reporthook,
            )
        except (urllib.error.URLError, ConnectionError) as ex:
            if attempt == max_retries:
                raise OSError(f'Download from {download_location} failed.') from ex
            time.sleep(2.25**attempt)
        else:
            return resp

def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
    repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
    url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
    reporthook = None
    if verbose:
        reporthook = print
    zip_dir.mkdir(parents=True, exist_ok=True)
    filename, _headers = retrieve_with_retries(
        url,
        zip_dir / f'{commit_hash}.zip',
        reporthook
    )
    return filename


def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
    arch = os.environ.get('PreferredToolArchitecture')
    if not arch:
        machine = platform.machine()
        arch = 'ARM64' if machine == 'ARM64' else 'AMD64'
    elif arch.lower() in ('x86', 'x64'):
        arch = 'AMD64'
    reporthook = None
    if verbose:
        reporthook = print
    tarball_dir.mkdir(parents=True, exist_ok=True)

    arch_filename = f'{tag}-{arch}.tar.xz'
    arch_url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{arch_filename}'
    try:
        output_path = tarball_dir / arch_filename
        retrieve_with_retries(arch_url, output_path, reporthook)
        return output_path
    except OSError:
        if verbose:
            print(f'{arch_filename} not found, trying generic binary...')

    generic_filename = f'{tag}.tar.xz'
    generic_url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{generic_filename}'
    output_path = tarball_dir / generic_filename
    retrieve_with_retries(generic_url, output_path, reporthook)
    return output_path


def extract_tarball(externals_dir, tarball_path, tag):
    output_path = externals_dir / tag
    with tarfile.open(tarball_path) as tf:
        tf.extractall(os.fspath(externals_dir), filter='data')
    return output_path


def extract_zip(externals_dir, zip_path):
    with zipfile.ZipFile(os.fspath(zip_path)) as zf:
        zf.extractall(os.fspath(externals_dir))
        return externals_dir / zf.namelist()[0].split('/')[0]


def parse_args():
    p = argparse.ArgumentParser()
    p.add_argument('-v', '--verbose', action='store_true')
    p.add_argument('-b', '--binary', action='store_true',
                   help='Is the dependency in the binary repo?')
    p.add_argument('-r', '--release', action='store_true',
                   help='Download from GitHub release assets instead of branch')
    p.add_argument('-O', '--organization',
                   help='Organization owning the deps repos', default='python')
    p.add_argument('-e', '--externals-dir', type=pathlib.Path,
                   help='Directory in which to store dependencies',
                   default=pathlib.Path(__file__).parent.parent / 'externals')
    p.add_argument('tag',
                   help='tag of the dependency')
    return p.parse_args()


def main():
    args = parse_args()
    final_name = args.externals_dir / args.tag

    # Check if the dependency already exists in externals/ directory
    # (either already downloaded/extracted, or checked into the git tree)
    if final_name.exists():
        if args.verbose:
            print(f'{args.tag} already exists at {final_name}, skipping download.')
        return

    # Determine download method: release artifacts for large deps (like LLVM),
    # otherwise zip download from GitHub branches
    if args.release:
        tarball_path = fetch_release(
            args.tag,
            args.externals_dir / 'tarballs',
            org=args.organization,
            verbose=args.verbose,
        )
        extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
    else:
        # Use zip download from GitHub branches
        # (cpython-bin-deps if --binary, cpython-source-deps otherwise)
        zip_path = fetch_zip(
            args.tag,
            args.externals_dir / 'zips',
            org=args.organization,
            binary=args.binary,
            verbose=args.verbose,
        )
        extracted = extract_zip(args.externals_dir, zip_path)

    if extracted != final_name:
        for wait in [1, 2, 3, 5, 8, 0]:
            try:
                extracted.replace(final_name)
                break
            except PermissionError as ex:
                retry = f" Retrying in {wait}s..." if wait else ""
                print(f"Encountered permission error '{ex}'.{retry}", file=sys.stderr)
                time.sleep(wait)
        else:
            print(
                f"ERROR: Failed to rename {extracted} to {final_name}.",
                "You may need to restart your build",
                file=sys.stderr,
            )
            sys.exit(1)


if __name__ == '__main__':
    main()