├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── bazel-cc-sysroot-generator ├── examples ├── macos-config.toml ├── minimal-config.toml ├── ppa-config.toml └── sysroot-config.toml └── reduce-llvm-toolchain /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | jobs: 10 | macos_build: 11 | runs-on: macos-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | - run: ./bazel-cc-sysroot-generator --config examples/macos-config.toml 15 | - run: ./bazel-cc-sysroot-generator --config examples/sysroot-config.toml 16 | - run: ./bazel-cc-sysroot-generator --config examples/minimal-config.toml 17 | - run: ./bazel-cc-sysroot-generator --config examples/ppa-config.toml 18 | 19 | ubuntu_build: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v3 23 | - run: ./bazel-cc-sysroot-generator --config examples/sysroot-config.toml 24 | - run: ./bazel-cc-sysroot-generator --config examples/minimal-config.toml 25 | - run: ./bazel-cc-sysroot-generator --config examples/ppa-config.toml 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /sysroot-* 2 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: v0.11.12 4 | hooks: 5 | - id: ruff 6 | args: [--fix] 7 | - id: ruff-format 8 | - repo: https://github.com/pre-commit/mirrors-mypy 9 | rev: v1.16.0 10 | hooks: 11 | - id: mypy 12 | args: [--strict, --scripts-are-modules] 13 | additional_dependencies: 14 | - types-toml 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2025 Keith Smiley (http://keith.so) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the 'Software'), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bazel-cc-sysroot-generator 2 | 3 | This is a CLI for generating Ubuntu and macOS hermetic sysroots for use 4 | with bazel. 5 | 6 | ## Usage 7 | 8 | ### Prerequisites 9 | 10 | Please make sure you have at least Python 3.12, `tar`, `zstd`, and `xz-utils` installed. 11 | 12 | ### Generate sysroots 13 | 14 | Create a config file named `sysroot-config.toml` (or `.json`) to define 15 | your sysroots: 16 | 17 | ```toml 18 | [[platforms]] 19 | os = "jammy" 20 | archs = ["aarch64", "x86_64"] 21 | packages = [ 22 | "libstdc++-11-dev", 23 | "libstdc++6", 24 | ] 25 | deleted_patterns = [ 26 | "etc", 27 | "usr/bin", 28 | ] 29 | 30 | [[platforms]] 31 | os = "macos" 32 | deleted_patterns = [ 33 | "*Swift*", 34 | "*iOSSupport*", 35 | "usr/share", 36 | "usr/libexec", 37 | ] 38 | ``` 39 | 40 | For Ubuntu you can optionally specifiy repositories to pull from: 41 | 42 | ```toml 43 | [[platforms]] 44 | os = "jammy" 45 | repositories = ["main", "universe"] 46 | ... 47 | ``` 48 | 49 | Run `./bazel-cc-sysroot-generator` (optionally passing `--config 50 | path/to/sysroot-config.toml`). 51 | 52 | This example config generates 3 sysroots in the current directory. 53 | 54 | > [!NOTE] 55 | > macOS sysroots can only be generated on macOS and copy the 56 | > currently selected SDK directory. 57 | 58 | ### Include in bazel 59 | 60 | Once you have generated the sysroots, you can upload them somewhere and 61 | reference them in bazel with something like: 62 | 63 | ```bzl 64 | # MODULE.bazel 65 | bazel_dep(name = "sysroot-jammy-x86_64") 66 | archive_override( 67 | module_name = "sysroot-jammy-x86_64", 68 | integrity = "...", 69 | urls = [ 70 | "https://...", 71 | ], 72 | ) 73 | ``` 74 | 75 | And use them with a hermetic CC toolchain such as 76 | [toolchains_llvm](https://github.com/bazel-contrib/toolchains_llvm) with 77 | something like: 78 | 79 | ```bzl 80 | # MODULE.bazel 81 | llvm = use_extension("@toolchains_llvm//toolchain/extensions:llvm.bzl", "llvm") 82 | llvm.toolchain( 83 | name = "llvm_toolchain", 84 | llvm_versions = {"": "19.1.9"}, 85 | stdlib = {"": "dynamic-stdc++"}, 86 | ) 87 | 88 | llvm.sysroot( 89 | name = "llvm_toolchain", 90 | label = "@sysroot-jammy-x86_64", 91 | targets = ["linux-x86_64"], 92 | ) 93 | use_repo(llvm, "llvm_toolchain") 94 | 95 | register_toolchains("@llvm_toolchain//:all") 96 | ``` 97 | 98 | Optionally you can disable bazel's builtin CC toolchain to make sure 99 | it's an error if you accidentally use it by adding this to your 100 | `.bazelrc`: 101 | 102 | ``` 103 | common --repo_env=BAZEL_DO_NOT_DETECT_CPP_TOOLCHAIN=1 104 | ``` 105 | 106 | > [!TIP] 107 | > While iterating on your required packages, you can use 108 | > `--override_module=sysroot-jammy-x86_64=path/to/sysroot-jammy-x86_64` 109 | -------------------------------------------------------------------------------- /bazel-cc-sysroot-generator: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from pathlib import Path 4 | from typing import Any, Generator 5 | import argparse 6 | import collections 7 | import contextlib 8 | import fnmatch 9 | import gzip 10 | import json 11 | import multiprocessing.pool 12 | import os 13 | import shutil 14 | import subprocess 15 | import sys 16 | import tarfile 17 | import tempfile 18 | import urllib.error 19 | import urllib.request 20 | 21 | try: 22 | import tomllib as tl 23 | except ImportError: 24 | try: 25 | import toml as tl # type: ignore 26 | except ImportError: 27 | tl = None # type: ignore 28 | 29 | 30 | _SUPPORTED_OS = { 31 | "bionic", 32 | "focal", 33 | "jammy", 34 | "macos", 35 | "noble", 36 | } 37 | 38 | 39 | Arch = collections.namedtuple("Arch", ["id", "ubuntu_id", "ubuntu_mirror"]) 40 | MirrorInfo = collections.namedtuple("MirrorInfo", ["url", "files"]) 41 | 42 | ARM64 = Arch("aarch64", "arm64", "http://ports.ubuntu.com/ubuntu-ports") 43 | X86_64 = Arch("x86_64", "amd64", "http://gb.archive.ubuntu.com/ubuntu") 44 | 45 | _PPA_URL = "http://ppa.launchpad.net" 46 | 47 | 48 | @contextlib.contextmanager 49 | def restore_pwd() -> Generator[Any, Any, Any]: 50 | pwd = os.getcwd() 51 | try: 52 | yield 53 | finally: 54 | os.chdir(pwd) 55 | 56 | 57 | def _get_required(obj: dict[str, Any], key: str) -> Any: 58 | if key not in obj or not obj[key]: 59 | raise SystemExit(f"error: {key} must exist and be non null") 60 | 61 | return obj[key] 62 | 63 | 64 | def _setup_sysroot(suffix: str) -> Path: 65 | sysroot_dir = Path(f"sysroot-{suffix}") 66 | if sysroot_dir.is_dir(): 67 | shutil.rmtree(sysroot_dir) 68 | elif sysroot_dir.exists(): 69 | sysroot_dir.unlink() 70 | if sysroot_dir.exists(): 71 | raise SystemExit( 72 | f"error: failed to remove '{sysroot_dir}', please delete it and re-run" 73 | ) 74 | sysroot_dir.mkdir() 75 | return sysroot_dir 76 | 77 | 78 | def _find_data_archive(dirname: Path) -> Path: 79 | for ext in ("zst", "zstd", "xz"): 80 | path = dirname / f"data.tar.{ext}" 81 | if path.exists(): 82 | return path 83 | raise ValueError(f"error: failed to find data.tar.zst or data.tar.xz in {dirname}") 84 | 85 | 86 | def _download_and_extract_package(name: str, url: str, package_dir: Path) -> None: 87 | name = url.split("/")[-1] 88 | output_path = Path(f"/tmp/{name}.deb") 89 | try: 90 | urllib.request.urlretrieve(url, output_path) 91 | except urllib.error.HTTPError as e: 92 | print( 93 | f"error: failed to download {name} from {url}: {e}", 94 | file=sys.stderr, 95 | ) 96 | raise 97 | with tempfile.TemporaryDirectory() as dirname: 98 | with restore_pwd(): 99 | os.chdir(dirname) 100 | subprocess.check_output(["ar", "x", output_path]) 101 | archive = _find_data_archive(Path(dirname)) 102 | package_dir.mkdir(parents=True, exist_ok=True) 103 | subprocess.check_output( 104 | [ 105 | "tar", 106 | "xf", 107 | archive, 108 | "-C", 109 | package_dir, 110 | ] 111 | ) 112 | 113 | 114 | def _download_package_list( 115 | ubuntu_release: str, repository: str, arch: Arch 116 | ) -> tuple[Path, str]: 117 | package_archive = Path( 118 | f"/tmp/packages-{ubuntu_release}-{repository.replace('/', '_')}-{arch.id}.gz" 119 | ) 120 | mirror_url = arch.ubuntu_mirror 121 | is_ppa_repository = repository.startswith("ppa:") 122 | if is_ppa_repository: 123 | repository = repository.lstrip("ppa:") 124 | mirror_url = f"{_PPA_URL}/{repository}/ubuntu" 125 | if not package_archive.exists(): 126 | print( 127 | f"Downloading package list for {ubuntu_release}-{repository}-{arch.id}..." 128 | ) 129 | if is_ppa_repository: 130 | package_url = f"{mirror_url}/dists/{ubuntu_release}/main/binary-{arch.ubuntu_id}/Packages.gz" 131 | else: 132 | package_url = f"{mirror_url}/dists/{ubuntu_release}/{repository}/binary-{arch.ubuntu_id}/Packages.gz" 133 | # TODO: progress reporting 134 | urllib.request.urlretrieve(package_url, package_archive) 135 | return package_archive, mirror_url 136 | 137 | 138 | def _download_packages( 139 | ubuntu_release: str, 140 | arch: Arch, 141 | repositories: set[str], 142 | packages: set[str], 143 | sysroot_dir: Path, 144 | ) -> None: 145 | repo_to_mirror_info = {} 146 | for repo in repositories: 147 | package_archive, mirror_url = _download_package_list(ubuntu_release, repo, arch) 148 | with gzip.open(package_archive, "rb") as f: 149 | repo_to_mirror_info[repo] = MirrorInfo( 150 | mirror_url, 151 | [ 152 | x 153 | for x in f.read().decode().splitlines() 154 | if x.startswith("Filename: ") 155 | ], 156 | ) 157 | 158 | package_urls = {} 159 | needed_packages = set(packages) 160 | for repo, mirror_info in repo_to_mirror_info.items(): 161 | for filename in mirror_info.files: 162 | if not needed_packages: 163 | break 164 | for package in needed_packages: 165 | # Filename: pool/main/c/curl/libcurl4-openssl-dev_7.68.0-1ubuntu2_amd64.deb 166 | last_component = filename.split("/")[-1] 167 | if last_component.startswith(f"{package}_"): 168 | package_urls[package] = ( 169 | f"{mirror_info.url}/{filename.split(' ')[-1]}" 170 | ) 171 | needed_packages.remove(package) 172 | break 173 | 174 | if needed_packages: 175 | raise SystemExit( 176 | "Failed to find some packages, please report this issue: {}".format( 177 | " ".join(sorted(needed_packages)) 178 | ) 179 | ) 180 | 181 | pool = multiprocessing.pool.Pool() 182 | results = [] 183 | for name, package_url in package_urls.items(): 184 | results.append( 185 | pool.apply_async( 186 | _download_and_extract_package, 187 | (name, package_url, sysroot_dir.absolute()), 188 | ) 189 | ) 190 | 191 | pool.close() 192 | pool.join() 193 | 194 | for result in results: 195 | if not result.successful(): 196 | raise SystemExit(f"error: {result.get()}") 197 | 198 | 199 | def _cleanup_linux_sysroot( 200 | arch: Arch, 201 | deleted_patterns: list[str], 202 | sysroot_dir: Path, 203 | ) -> None: 204 | broken_libraries_dir = Path("usr") / "lib" / f"{arch.id}-linux-gnu" 205 | destination_dir = Path("lib") / f"{arch.id}-linux-gnu" 206 | 207 | with restore_pwd(): 208 | os.chdir(sysroot_dir.absolute()) 209 | _remove_matching_patterns(Path("."), deleted_patterns) 210 | _fix_package_symlinks(broken_libraries_dir, destination_dir) 211 | _fix_package_symlinks(Path("lib64"), destination_dir) 212 | 213 | _validate_relative_symlinks(sysroot_dir) 214 | 215 | 216 | def _write_bazel_files(sysroot_dir: Path) -> None: 217 | name = sysroot_dir.name 218 | (sysroot_dir / "BUILD.bazel").write_text(f"""\ 219 | load("@bazel_skylib//rules/directory:directory.bzl", "directory") 220 | 221 | directory( 222 | name = "root", 223 | srcs = glob(["**/*"]), 224 | visibility = ["//visibility:public"], 225 | ) 226 | 227 | filegroup( 228 | name = "{name}", 229 | srcs = glob(["**"]), 230 | visibility = ["//visibility:public"], 231 | ) 232 | """) 233 | (sysroot_dir / "MODULE.bazel").write_text(f"""\ 234 | module(name = "{name}") 235 | 236 | bazel_dep(name = "bazel_skylib", version = "1.7.1") 237 | """) 238 | 239 | 240 | def _archive(sysroot_dir: Path) -> None: 241 | # https://stackoverflow.com/questions/1094841/get-a-human-readable-version-of-a-file-size 242 | def sizeof_fmt(num: float) -> str: 243 | for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"): 244 | if abs(num) < 1024.0: 245 | return f"{num:3.1f} {unit}B" 246 | num /= 1024.0 247 | return f"{num:.1f} YiB" 248 | 249 | output = Path(f"{sysroot_dir.name}.tar.xz") 250 | output.unlink(missing_ok=True) 251 | with tarfile.open(output, "w:xz") as tar: 252 | tar.add(sysroot_dir, arcname="") 253 | 254 | size = float(output.stat().st_size) 255 | print(f"{output}: {sizeof_fmt(size)}") 256 | 257 | 258 | def _fix_package_symlinks(broken_libraries_dir: Path, destination_dir: Path) -> None: 259 | if not broken_libraries_dir.exists() or not destination_dir.exists(): 260 | return 261 | 262 | relative_root = Path(".") 263 | for _ in range(0, len(broken_libraries_dir.parts)): 264 | relative_root /= ".." 265 | 266 | relative_root /= destination_dir 267 | for lib in broken_libraries_dir.glob("*.so*"): 268 | # Skip normal files 269 | if not lib.is_symlink(): 270 | continue 271 | 272 | # Skip symlinks to relative paths that already exist inside the sysroot 273 | # TODO: this should validate the relative-ness actually lives inside the sysroot 274 | if not lib.readlink().is_absolute(): 275 | continue 276 | 277 | dest = relative_root / lib.readlink().name 278 | lib.unlink() 279 | lib.symlink_to(dest) 280 | 281 | if not lib.exists(): 282 | print( 283 | f"WARNING: deleting dead symlink: {lib} (you might want to install the providing package instead)" 284 | ) 285 | lib.unlink() 286 | 287 | 288 | def _remove_matching_patterns(sysroot: Path, patterns: list[str]) -> None: 289 | def _should_delete(name: Path, patterns: list[str]) -> bool: 290 | return any(fnmatch.fnmatch(str(name), x) for x in patterns) 291 | 292 | for root, dirs, files in os.walk(str(sysroot)): 293 | for dir in dirs: 294 | path = Path(root) / dir 295 | if _should_delete(path, patterns): 296 | shutil.rmtree(path) 297 | 298 | for file in files: 299 | path = Path(root) / file 300 | if _should_delete(path, patterns): 301 | path.unlink() 302 | 303 | 304 | # NOTE: this should be solved by the symlink re-writing, but this is another safety net 305 | def _validate_relative_symlinks(root: Path) -> None: 306 | for _, _, files in os.walk(str(root)): 307 | for file in files: 308 | path = Path(root) / file 309 | if path.is_symlink(): 310 | if path.readlink().absolute(): 311 | raise SystemExit(f"{path}: error: expected a relative symlink") 312 | 313 | 314 | def _generate_ubuntu_sysroot(os_name: str, platform: dict[str, Any]) -> None: 315 | packages = set(_get_required(platform, "packages")) 316 | repositories = set(platform.get("repositories") or ["main"]) 317 | deleted_patterns = platform.get("deleted_patterns") or [] 318 | archs = _get_required(platform, "archs") 319 | for arch_str in archs: 320 | if arch_str in ("aarch64", "arm64"): 321 | arch = ARM64 322 | elif arch_str in ("amd64", "x86_64"): 323 | arch = X86_64 324 | else: 325 | raise SystemExit( 326 | f"error: unsupported arch '{arch_str}', valid options: aarch64, x86_64" 327 | ) 328 | 329 | sysroot_dir = _setup_sysroot(f"{os_name}-{arch.id}") 330 | _download_packages(os_name, arch, repositories, packages, sysroot_dir) 331 | _cleanup_linux_sysroot(arch, deleted_patterns, sysroot_dir) 332 | _write_bazel_files(sysroot_dir) 333 | _archive(sysroot_dir) 334 | 335 | 336 | def _generate_macos_sysroot(platform: dict[str, Any]) -> None: 337 | sysroot_dir = _setup_sysroot("macos") 338 | system_sysroot = ( 339 | subprocess.check_output(["xcrun", "--show-sdk-path", "--sdk", "macosx"]) 340 | .decode() 341 | .strip() 342 | ) 343 | shutil.copytree( 344 | system_sysroot, 345 | sysroot_dir, 346 | symlinks=True, 347 | dirs_exist_ok=True, 348 | ) 349 | 350 | deleted_patterns = platform.get("deleted_patterns") or [] 351 | with restore_pwd(): 352 | os.chdir(sysroot_dir.absolute()) 353 | _remove_matching_patterns(Path("."), deleted_patterns) 354 | 355 | _validate_relative_symlinks(sysroot_dir) 356 | _write_bazel_files(sysroot_dir) 357 | _archive(sysroot_dir) 358 | 359 | 360 | def _main(platforms: list[dict[str, Any]]) -> None: 361 | for platform in platforms: 362 | os_name = _get_required(platform, "os") 363 | if os_name not in _SUPPORTED_OS: 364 | raise SystemExit( 365 | f"error: unsupported os '{os_name}', valid options: {', '.join(sorted(_SUPPORTED_OS))}" 366 | ) 367 | 368 | if os_name == "macos": 369 | _generate_macos_sysroot(platform) 370 | else: 371 | _generate_ubuntu_sysroot(os_name, platform) 372 | 373 | 374 | def _build_parser() -> argparse.ArgumentParser: 375 | parser = argparse.ArgumentParser() 376 | parser.add_argument( 377 | "--config", 378 | help="The config file to read from, defaults to 'sysroot-config.(toml|json)'", 379 | ) 380 | return parser 381 | 382 | 383 | if __name__ == "__main__": 384 | args = _build_parser().parse_args() 385 | config_file = args.config 386 | if not config_file: 387 | if os.path.exists("sysroot-config.json"): 388 | config_file = "sysroot-config.json" 389 | else: 390 | config_file = "sysroot-config.toml" 391 | 392 | if not os.path.exists(config_file): 393 | raise SystemExit( 394 | f"{config_file}: error: file does not exist, use --config to pass another path" 395 | ) 396 | 397 | with open(config_file) as f: 398 | contents = f.read() 399 | try: 400 | config = json.loads(contents) 401 | except json.JSONDecodeError: 402 | if not tl: 403 | raise SystemExit( 404 | "error: to use toml, either use python3.11 or run 'pip3 install toml'. Otherwise use json instead" 405 | ) 406 | config = tl.loads(contents) 407 | 408 | platforms = _get_required(config, "platforms") 409 | _main(platforms) 410 | -------------------------------------------------------------------------------- /examples/macos-config.toml: -------------------------------------------------------------------------------- 1 | [[platforms]] 2 | os = "macos" 3 | deleted_patterns = [ 4 | "*.swift*", 5 | "*iOSSupport*", 6 | "*Ruby.framework*", 7 | "*Swift*.framework", 8 | "*SwiftUI*", 9 | "System/Cryptexes", 10 | "System/Library/Perl", 11 | "usr/lib/swift", 12 | "usr/libexec", 13 | "usr/share", 14 | ] 15 | -------------------------------------------------------------------------------- /examples/minimal-config.toml: -------------------------------------------------------------------------------- 1 | [[platforms]] 2 | os = "focal" 3 | repositories = ["main", "universe"] 4 | archs = ["x86_64"] 5 | packages = [ 6 | "libstdc++-10-dev", 7 | "libstdc++6" 8 | ] 9 | deleted_patterns = [ 10 | "etc", 11 | ] 12 | -------------------------------------------------------------------------------- /examples/ppa-config.toml: -------------------------------------------------------------------------------- 1 | [[platforms]] 2 | os = "focal" 3 | repositories = ["main", "ppa:ubuntu-toolchain-r/test"] 4 | archs = ["x86_64"] 5 | packages = [ 6 | "gcc-11", 7 | "g++-11", 8 | "libacl1-dev", 9 | ] 10 | deleted_patterns = [ 11 | "etc", 12 | ] 13 | -------------------------------------------------------------------------------- /examples/sysroot-config.toml: -------------------------------------------------------------------------------- 1 | [[platforms]] 2 | os = "jammy" 3 | archs = ["aarch64", "x86_64"] 4 | packages = [ 5 | "libbsd-dev", 6 | "libbsd0", 7 | "libc6", 8 | "libc6-dev", 9 | "libedit-dev", 10 | "libedit2", 11 | "libgcc-11-dev", 12 | "libgcc-s1", 13 | "libmd-dev", 14 | "libmd0", 15 | "libncurses-dev", 16 | "libncurses6", 17 | "libstdc++-11-dev", 18 | "libstdc++6", 19 | "libtinfo6", 20 | "linux-libc-dev", 21 | "zlib1g-dev", 22 | ] 23 | deleted_patterns = [ 24 | "*.pc", 25 | "*/gconv.h", 26 | "*/libasan*", 27 | "*/libc.a", 28 | "*/libedit.a", 29 | "*/liblsan*", 30 | "*/libm-*.a", 31 | "*/libmd.a", 32 | "*/libpanel.a", 33 | "*/libstdc*.a", 34 | "*/libtinfo.a", 35 | "*/libtsan*", 36 | "*/libubsan*", 37 | "etc", 38 | "usr/bin", 39 | "usr/lib/*-linux-gnu/audit", 40 | "usr/lib/*-linux-gnu/gconv", 41 | "usr/lib/*-linux-gnu/pkgconfig", 42 | "usr/lib/valgrind", 43 | "usr/share", 44 | ] 45 | -------------------------------------------------------------------------------- /reduce-llvm-toolchain: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | from pathlib import Path 4 | import argparse 5 | import shutil 6 | import subprocess 7 | import sys 8 | import tarfile 9 | import urllib.error 10 | import urllib.request 11 | 12 | _KEEP_BINARIES = { 13 | "clang", # NOTE: clang-N is handled below 14 | "clang-tidy", 15 | "lld", 16 | "llvm-ar", 17 | "llvm-as", 18 | "llvm-cov", 19 | "llvm-dwp", 20 | "llvm-nm", 21 | "llvm-objcopy", 22 | "llvm-objdump", 23 | "llvm-profdata", 24 | } 25 | 26 | 27 | def _archive(toolchain: Path) -> None: 28 | # https://stackoverflow.com/questions/1094841/get-a-human-readable-version-of-a-file-size 29 | def sizeof_fmt(num: float) -> str: 30 | for unit in ("", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi"): 31 | if abs(num) < 1024.0: 32 | return f"{num:3.1f} {unit}B" 33 | num /= 1024.0 34 | return f"{num:.1f} YiB" 35 | 36 | output = Path(f"{toolchain.name}.tar.xz") 37 | output.unlink(missing_ok=True) 38 | with tarfile.open(output, "w:xz") as tar: 39 | tar.add(toolchain, arcname="") 40 | 41 | size = float(output.stat().st_size) 42 | print(f"{output}: {sizeof_fmt(size)}") 43 | 44 | 45 | def _keep_binary(binary: Path) -> bool: 46 | if binary.name in _KEEP_BINARIES: 47 | return True 48 | if binary.name.startswith("clang-"): 49 | # clang-20 -> 20 50 | # clang-tidy -> tidy 51 | part = binary.name.split("-", 1)[-1] 52 | try: 53 | int(part) 54 | return True 55 | except ValueError: 56 | pass 57 | 58 | return False 59 | 60 | 61 | def _get_strip() -> str: 62 | if strip := shutil.which("llvm-strip"): 63 | return strip 64 | 65 | if config := shutil.which("llvm-config"): 66 | root = Path(subprocess.check_output([config, "--bindir"]).decode().strip()) 67 | if strip := shutil.which("llvm-strip", path=root): 68 | return strip 69 | 70 | if brew := shutil.which("brew"): 71 | try: 72 | root = Path( 73 | subprocess.check_output([brew, "--prefix", "llvm"]).decode().strip() 74 | ) 75 | if strip := shutil.which("llvm-strip", path=root / "bin"): 76 | return strip 77 | except subprocess.CalledProcessError: 78 | pass 79 | 80 | raise SystemExit("llvm-strip not found") 81 | 82 | 83 | def _cleanup_unused_files(toolchain: Path) -> None: 84 | # These headers are if you were linking this copy of llvm 85 | shutil.rmtree(toolchain / "include") 86 | shutil.rmtree(toolchain / "libexec") 87 | # Doesn't exist on macOS 88 | shutil.rmtree(toolchain / "local", ignore_errors=True) 89 | shutil.rmtree(toolchain / "share") 90 | 91 | bin_dir = toolchain / "bin" 92 | for binary in bin_dir.iterdir(): 93 | if binary.is_symlink() or _keep_binary(binary): 94 | continue 95 | 96 | binary.unlink() 97 | 98 | strip = _get_strip() 99 | for binary in bin_dir.iterdir(): 100 | # Remove symlinks that became broken 101 | if not binary.exists(): 102 | binary.unlink() 103 | continue 104 | 105 | if not binary.is_symlink(): 106 | subprocess.check_call([strip, "-s", binary]) 107 | 108 | lib_dir = toolchain / "lib" 109 | for lib in lib_dir.iterdir(): 110 | if lib.name == "clang": 111 | continue 112 | 113 | if lib.is_dir(): 114 | shutil.rmtree(lib) 115 | else: 116 | lib.unlink() 117 | 118 | 119 | def _write_bazel_files(toolchain: Path) -> None: 120 | name = toolchain.name 121 | (toolchain / "BUILD.bazel").write_text("""\ 122 | package(default_visibility = ["//visibility:public"]) 123 | 124 | filegroup( 125 | name = "clang", 126 | srcs = glob(["bin/clang*"]), 127 | ) 128 | 129 | filegroup( 130 | name = "ld", 131 | srcs = glob(["bin/*ld*"]), 132 | ) 133 | 134 | filegroup( 135 | name = "include", 136 | srcs = glob([ 137 | "lib/clang/*/include/**", 138 | "lib/clang/*/share/**/*.txt", # sanitizer default ignore lists 139 | ]), 140 | ) 141 | 142 | filegroup( 143 | name = "bin", 144 | srcs = glob(["bin/**"]), 145 | ) 146 | 147 | filegroup( 148 | name = "lib", 149 | srcs = glob( 150 | [ 151 | "lib/clang/*/lib/**/*.a", # sanitizers 152 | "lib/clang/*/lib/**/*.o", # crtbegin.o 153 | "lib/clang/*/lib/**/*.so", # sanitizers linux 154 | "lib/clang/*/lib/**/*.dylib", # sanitizers macOS 155 | "lib/clang/*/lib/**/*.syms", # sanitizers syms files used during linking 156 | ], 157 | allow_empty = True, 158 | ), 159 | ) 160 | 161 | filegroup( 162 | name = "ar", 163 | srcs = ["bin/llvm-ar"], 164 | ) 165 | 166 | filegroup( 167 | name = "as", 168 | srcs = ["bin/llvm-as"], 169 | ) 170 | 171 | filegroup( 172 | name = "nm", 173 | srcs = ["bin/llvm-nm"], 174 | ) 175 | 176 | filegroup( 177 | name = "objcopy", 178 | srcs = ["bin/llvm-objcopy"], 179 | ) 180 | 181 | filegroup( 182 | name = "objdump", 183 | srcs = ["bin/llvm-objdump"], 184 | ) 185 | 186 | filegroup( 187 | name = "profdata", 188 | srcs = ["bin/llvm-profdata"], 189 | ) 190 | 191 | filegroup( 192 | name = "dwp", 193 | srcs = ["bin/llvm-dwp"], 194 | ) 195 | 196 | filegroup( 197 | name = "ranlib", 198 | srcs = [ 199 | "bin/llvm-ar", 200 | "bin/llvm-ranlib", 201 | ], 202 | ) 203 | 204 | filegroup( 205 | name = "strip", 206 | srcs = [ 207 | "bin/llvm-objcopy", 208 | "bin/llvm-strip", 209 | ], 210 | ) 211 | 212 | filegroup( 213 | name = "clang-tidy", 214 | srcs = ["bin/clang-tidy"], 215 | ) 216 | """) 217 | (toolchain / "MODULE.bazel").write_text(f'module(name = "{name}")\n') 218 | 219 | 220 | def _main(toolchain: Path, skip_archive: bool) -> None: 221 | _cleanup_unused_files(toolchain) 222 | _write_bazel_files(toolchain) 223 | if not skip_archive: 224 | _archive(toolchain) 225 | 226 | 227 | def _download(url: str, output: Path) -> None: 228 | name = url.split("/")[-1] 229 | output_archive = Path(f"/tmp/{name}") 230 | if output_archive.exists(): 231 | print(f"Using cached download at {output_archive}") 232 | else: 233 | try: 234 | urllib.request.urlretrieve(url, output_archive) 235 | except urllib.error.HTTPError as e: 236 | print( 237 | f"error: failed to download {name} from {url}: {e}", 238 | file=sys.stderr, 239 | ) 240 | raise 241 | 242 | shutil.rmtree(output, ignore_errors=True) 243 | output.mkdir(parents=True) 244 | subprocess.check_output( 245 | [ 246 | "tar", 247 | "xf", 248 | output_archive, 249 | "--strip-components=1", 250 | "-C", 251 | output, 252 | ] 253 | ) 254 | 255 | 256 | def _build_parser() -> argparse.ArgumentParser: 257 | parser = argparse.ArgumentParser() 258 | parser.add_argument("name") 259 | parser.add_argument("url") 260 | parser.add_argument("--skip-archive", action="store_true") 261 | return parser 262 | 263 | 264 | if __name__ == "__main__": 265 | args = _build_parser().parse_args() 266 | path = Path(f"llvm-{args.name}") 267 | _download(args.url, path) 268 | _main(path, args.skip_archive) 269 | --------------------------------------------------------------------------------