├── .github └── workflows │ ├── build-glibc-and-release.yaml │ ├── release-all-amazonlinux.yaml │ ├── release-all-debian.yaml │ ├── release-all-rhel.yaml │ ├── release-all-ubuntu.yaml │ ├── release-debian-bullseye.yaml │ ├── release-debian-buster.yaml │ └── release-specific.yaml ├── .gitignore ├── BZ-17645.md ├── BZ-19329.md ├── Dockerfile ├── Dockerfile.al2 ├── Dockerfile.centos ├── Dockerfile.debian ├── Dockerfile.rhel ├── README.md ├── SECURITY.md ├── patches ├── al2 │ └── 2.26 │ │ ├── mw-0001-remove-broken-code-path-for-easier-code-review.patch │ │ ├── mw-0002-Fix-data-races-between-pthread_create-and-dlopen.patch │ │ ├── mw-0003-BZ19329-fixup.patch │ │ ├── mw-0004-Consolidate-link-map-sorting.patch │ │ └── mw-0005-Backport-BZ17645-patch-to-glibc-2.28.patch ├── debian │ ├── 2.24 │ │ ├── unsubmitted-mathworks-bz19329-1-of-2.v2.27.patch │ │ ├── unsubmitted-mathworks-bz19329-2-of-2.v2.27.patch │ │ └── unsubmitted-mathworks-bz19329-fixup.v2.27.patch │ ├── 2.27 │ │ ├── unsubmitted-mathworks-0-bz17645.v2.31.patch │ │ ├── unsubmitted-mathworks-bz19329-1-of-2.v2.27.patch │ │ ├── unsubmitted-mathworks-bz19329-2-of-2.v2.27.patch │ │ └── unsubmitted-mathworks-bz19329-fixup.v2.27.patch │ ├── 2.28 │ ├── 2.31 │ │ ├── unsubmitted-mathworks-0-bz17645.v2.31.patch │ │ ├── unsubmitted-mathworks-bz19329-1-of-2.v2.31.patch │ │ ├── unsubmitted-mathworks-bz19329-2-of-2.v2.31.patch │ │ └── unsubmitted-mathworks-bz19329-fixup.v2.31.patch │ ├── 2.32 │ └── 2.33 └── rhel │ ├── 2.28-189 │ └── unsubmitted-mathworks-0-bz17645.v2.28-rhel.patch │ └── 2.28 │ ├── unsubmitted-mathworks-0-bz17645.v2.28-rhel.patch │ ├── unsubmitted-mathworks-glibc-bz19329-1-of-2.el8.patch │ ├── unsubmitted-mathworks-glibc-bz19329-2-of-2.el8.patch │ └── unsubmitted-mathworks-glibc-bz19329-fixup.el8.patch └── scripts ├── build-glibc-src.sh ├── get-glibc-src.sh ├── patch-glibc-src.sh ├── setup-glibc-build-env-vars.sh ├── update-specfile-al2.sh └── update-specfile.sh /.github/workflows/build-glibc-and-release.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The MathWorks, Inc. 2 | --- 3 | name: "build-glibc-and-release" 4 | 5 | on: 6 | workflow_call: 7 | inputs: 8 | dist-base: 9 | required: true 10 | type: string 11 | dist-tag: 12 | required: true 13 | type: string 14 | prerelease: 15 | required: false 16 | type: boolean 17 | default: false 18 | dockerfile: 19 | required: false 20 | type: string 21 | default: Dockerfile.debian 22 | 23 | jobs: 24 | build: 25 | runs-on: "ubuntu-latest" 26 | 27 | steps: 28 | - name: "Checkout source code" 29 | uses: "actions/checkout@v3" 30 | 31 | - name: "Build" 32 | shell: bash 33 | run: | 34 | DOCKER_BUILDKIT=1 docker build --build-arg DIST_BASE=${{ inputs.dist-base }} --build-arg DIST_TAG=${{ inputs.dist-tag }} -f ${{ inputs.dockerfile }} --output type=local,dest=. . 35 | 36 | - name: "Download glibc license info" 37 | shell: bash 38 | run: | 39 | cd build 40 | wget -q -O COPYING 'https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=COPYING;hb=HEAD' 41 | wget -q -O LICENSES 'https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=LICENSES;hb=HEAD' 42 | 43 | - name: Delete existing release 44 | uses: dev-drprasad/delete-tag-and-release@v0.2.1 45 | with: 46 | delete_release: true 47 | tag_name: ${{ inputs.dist-base }}-${{ inputs.dist-tag }} 48 | env: 49 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 50 | 51 | - name: Upload artifacts 52 | uses: actions/upload-artifact@v4 53 | with: 54 | name: ${{ inputs.dist-base }}-${{ inputs.dist-tag }} 55 | path: | 56 | build/* 57 | 58 | - name: Release artifacts 59 | uses: softprops/action-gh-release@v1 60 | with: 61 | tag_name: ${{ inputs.dist-base }}-${{ inputs.dist-tag }} 62 | name: "Current build for: ${{ inputs.dist-base }}-${{ inputs.dist-tag }}" 63 | body: "" 64 | prerelease: ${{ inputs.prerelease }} 65 | draft: false 66 | generate_release_notes: false 67 | files: | 68 | build/* 69 | -------------------------------------------------------------------------------- /.github/workflows/release-all-amazonlinux.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2025 The MathWorks, Inc. 2 | --- 3 | name: "release-all-amazonlinux" 4 | 5 | # Build and release all artifacts is either triggered manually or on the first day of the month to 6 | # ensure all artifacts are up-to-date with security and other patches to these distributions 7 | on: 8 | workflow_dispatch: 9 | schedule: 10 | - cron: '0 9 1 * *' 11 | 12 | # Wish I could use a matrix strategy here - tried to and discovered that there are limitations 13 | # The strategy property is not supported in any job that calls a reusable workflow. 14 | # from https://docs.github.com/en/actions/using-workflows/reusing-workflows#limitations 15 | jobs: 16 | build-amazonlinux-2: 17 | uses: ./.github/workflows/build-glibc-and-release.yaml 18 | with: 19 | dist-base: amazonlinux 20 | dist-tag: 2 21 | dockerfile: Dockerfile.al2 -------------------------------------------------------------------------------- /.github/workflows/release-all-debian.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021-2022 The MathWorks, Inc. 2 | --- 3 | name: "release-all-debian" 4 | 5 | # Build and release all artifacts is either triggered manually or on the first day of the month to 6 | # ensure all artifacts are up-to-date with security and other patches to these distributions 7 | on: 8 | workflow_dispatch: 9 | 10 | # Wish I could use a matrix strategy here - tried to and discovered that there are limitations 11 | # The strategy property is not supported in any job that calls a reusable workflow. 12 | # from https://docs.github.com/en/actions/using-workflows/reusing-workflows#limitations 13 | jobs: 14 | build-debian-buster: 15 | uses: ./.github/workflows/build-glibc-and-release.yaml 16 | with: 17 | dist-base: debian 18 | dist-tag: buster 19 | 20 | build-debian-bullseye: 21 | uses: ./.github/workflows/build-glibc-and-release.yaml 22 | with: 23 | dist-base: debian 24 | dist-tag: bullseye 25 | 26 | build-ubuntu-bionic: 27 | uses: ./.github/workflows/build-glibc-and-release.yaml 28 | with: 29 | dist-base: ubuntu 30 | dist-tag: bionic 31 | 32 | build-ubuntu-focal: 33 | uses: ./.github/workflows/build-glibc-and-release.yaml 34 | with: 35 | dist-base: ubuntu 36 | dist-tag: focal 37 | -------------------------------------------------------------------------------- /.github/workflows/release-all-rhel.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The MathWorks, Inc. 2 | --- 3 | name: "release-all-rhel" 4 | 5 | # Build and release all artifacts is either triggered manually or on the first day of the month to 6 | # ensure all artifacts are up-to-date with security and other patches to these distributions 7 | on: 8 | workflow_dispatch: 9 | # All current RHEL and downstream version include all required patches - no need to build each month 10 | # schedule: 11 | # - cron: '0 2 1 * *' 12 | 13 | jobs: 14 | build-alma-8-4: 15 | uses: mathworks/build-glibc-bz-19329-patch/.github/workflows/build-glibc-and-release.yaml@main 16 | with: 17 | dist-base: almalinux 18 | dist-tag: 8.4 19 | dockerfile: Dockerfile.rhel 20 | 21 | build-alma-8-5: 22 | uses: mathworks/build-glibc-bz-19329-patch/.github/workflows/build-glibc-and-release.yaml@main 23 | with: 24 | dist-base: almalinux 25 | dist-tag: 8.5 26 | dockerfile: Dockerfile.rhel 27 | -------------------------------------------------------------------------------- /.github/workflows/release-all-ubuntu.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021-2022 The MathWorks, Inc. 2 | --- 3 | name: "release-all-ubuntu" 4 | 5 | # Build and release all artifacts is either triggered manually or on the first day of the month to 6 | # ensure all artifacts are up-to-date with security and other patches to these distributions 7 | on: 8 | workflow_dispatch: 9 | schedule: 10 | - cron: '0 3 1 * *' 11 | 12 | # Wish I could use a matrix strategy here - tried to and discovered that there are limitations 13 | # The strategy property is not supported in any job that calls a reusable workflow. 14 | # from https://docs.github.com/en/actions/using-workflows/reusing-workflows#limitations 15 | jobs: 16 | build-ubuntu-bionic: 17 | uses: ./.github/workflows/build-glibc-and-release.yaml 18 | with: 19 | dist-base: ubuntu 20 | dist-tag: bionic 21 | 22 | build-ubuntu-focal: 23 | uses: ./.github/workflows/build-glibc-and-release.yaml 24 | with: 25 | dist-base: ubuntu 26 | dist-tag: focal -------------------------------------------------------------------------------- /.github/workflows/release-debian-bullseye.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021-2022 The MathWorks, Inc. 2 | --- 3 | name: "release-debian-bullseye" 4 | 5 | # Build and release all artifacts is either triggered manually or on the first day of the month to 6 | # ensure all artifacts are up-to-date with security and other patches to these distributions 7 | on: 8 | workflow_dispatch: 9 | schedule: 10 | - cron: '0 5 1 * *' 11 | 12 | # Wish I could use a matrix strategy here - tried to and discovered that there are limitations 13 | # The strategy property is not supported in any job that calls a reusable workflow. 14 | # from https://docs.github.com/en/actions/using-workflows/reusing-workflows#limitations 15 | jobs: 16 | build-debian-bullseye: 17 | uses: ./.github/workflows/build-glibc-and-release.yaml 18 | with: 19 | dist-base: debian 20 | dist-tag: bullseye 21 | -------------------------------------------------------------------------------- /.github/workflows/release-debian-buster.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021-2022 The MathWorks, Inc. 2 | --- 3 | name: "release-debian-buster" 4 | 5 | # Build and release all artifacts is either triggered manually or on the first day of the month to 6 | # ensure all artifacts are up-to-date with security and other patches to these distributions 7 | on: 8 | workflow_dispatch: 9 | schedule: 10 | - cron: '0 7 1 * *' 11 | 12 | # Wish I could use a matrix strategy here - tried to and discovered that there are limitations 13 | # The strategy property is not supported in any job that calls a reusable workflow. 14 | # from https://docs.github.com/en/actions/using-workflows/reusing-workflows#limitations 15 | jobs: 16 | build-debian-buster: 17 | uses: ./.github/workflows/build-glibc-and-release.yaml 18 | with: 19 | dist-base: debian 20 | dist-tag: buster -------------------------------------------------------------------------------- /.github/workflows/release-specific.yaml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The MathWorks, Inc. 2 | --- 3 | name: "release-specific" 4 | 5 | # To release a spcific set of artifacts using this workflow you can use the github CLI to trigger 6 | # a workflow dispatch. An example of this for ubuntu:focal would be: 7 | # 8 | # gh workflow run release-specific -f dist-base=ubuntu -f dist-tag=focal 9 | on: 10 | workflow_dispatch: 11 | inputs: 12 | dist-base: 13 | required: true 14 | type: string 15 | dist-tag: 16 | required: true 17 | type: string 18 | 19 | jobs: 20 | build-ubuntu: 21 | if: github.event.inputs.dist-base == 'ubuntu' || github.event.inputs.dist-base == 'debian' 22 | uses: mathworks/build-glibc-bz-19329-patch/.github/workflows/build-glibc-and-release.yaml@main 23 | with: 24 | dist-base: ${{ github.event.inputs.dist-base }} 25 | dist-tag: ${{ github.event.inputs.dist-tag }} 26 | dockerfile: Dockerfile.debian 27 | 28 | build-rhel: 29 | if: github.event.inputs.dist-base == 'almalinux' 30 | uses: mathworks/build-glibc-bz-19329-patch/.github/workflows/build-glibc-and-release.yaml@main 31 | with: 32 | dist-base: ${{ github.event.inputs.dist-base }} 33 | dist-tag: ${{ github.event.inputs.dist-tag }} 34 | dockerfile: Dockerfile.rhel -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The MathWorks, Inc. 2 | 3 | build/ -------------------------------------------------------------------------------- /BZ-17645.md: -------------------------------------------------------------------------------- 1 | # BZ-17645 Patch 2 | ## Summary 3 | This patch resolves a performance issue that affects MATLAB and Simulink shutdown performance. The patch provides a new sorting algorithm for shared objects in the dynamic loader. The original algorithm in glibc versions prior to glibc 2.35 is slow when the DSO set contains circular dependencies. 4 | 5 | If you are running 6 | * **ubuntu-based** systems and can upgrade to **version 22.04 (Jammy Jellyfish)** this is the safest and easiest way to alleviate the issue, since that version contains glibc v2.35 in which the underlying issue is completely fixed. 7 | * **RHEL8-based** systems (*update 06 June 2023*). It appears that RHEL have patched the `glibc-2.28` packages in release `225` to fix this issue. Ensure that you have installed at least [`glibc-2.28-225.el8`](https://git.almalinux.org/rpms/glibc/commit/b73861e1875801a1540e283fe0bb238ad448f04b). 8 | 9 | ## Bug Description 10 | The performance issue impacts the MATLAB and Simulink shutdown time. In a Debian 11 environment using glibc 2.31, the MATLAB and Simulink shutdown time is about 300 seconds with modern hardware. With the same setup and the patch enabled, the shutdown time is less than 3 seconds. The performance issue was first reported in November of 2014 by Paulo Andrade. For more information, see [RFE: Improve performance of dynamic loader for deeply nested DSO dependencies](https://sourceware.org/bugzilla/show_bug.cgi?id=17645). 11 | 12 | ## Patch Sources 13 | This patch contains a new implementation of _dl_sort_maps, which Paulo Andrade introduced in [RFE: Improve performance of dynamic loader for deeply nested DSO dependencies](https://sourceware.org/bugzilla/show_bug.cgi?id=17645). Chung-Lin Tang and Adhemerval Zanella later incorporated the new implementation into the master branch of glibc 2.35 in the commit [elf: Fix slow DSO sorting behavior in dynamic loader (BZ #17645)](https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=15a0c5730d1d5aeb95f50c9ec7470640084feae8). 14 | 15 | The MathWorks BZ-17645 patch sets the new DFS sorting algorithm as the default behavior. 16 | 17 | ## Acknowledgements and Thanks 18 | Many thanks to the glibc team and, particularly, Paulo Andrade for reporting the issue and providing the original implementation and Chung-Lin Tang and Adhemerval Zanella for incorporating the new sorting algorithm into glibc 2.35 and providing the original patch. 19 | -------------------------------------------------------------------------------- /BZ-19329.md: -------------------------------------------------------------------------------- 1 | # BZ-19329 Patch 2 | ## Summary 3 | This repository provides a method for working around the sporadic issue seen on older linux distributions: MathWorks® products can trigger an [assert failure at concurrent pthread_create and dlopen (BZ-19329)](https://sourceware.org/bugzilla/show_bug.cgi?id=19329) in the [GNU C Libraries (glibc)](https://www.gnu.org/software/libc/). 4 | 5 | If you are running 6 | * **ubuntu-based** systems and can upgrade to **version 22.04 (Jammy Jellyfish)** this is the safest and easiest way to alleviate the issue, since that version contains glibc v2.35 in which the underlying issue is completely fixed. 7 | * **RHEL-based 8.4 or 8.5** systems (*update 27 June 2022*). It appears that RHEL have patched the `glibc-2.28` packages in release `189` to fix this issue. Ensure that you have installed at least [`glibc-2.28-189.1.el8`](https://git.almalinux.org/rpms/glibc/commit/385bc0f199bf51199143fe12b857f4983db76e48). 8 | 9 | If instead you want to work around this issue, you can use this repository. It provides a build procedure (in an isolated Docker® container) to produce patched versions of the glibc libraries for recent Almalinux, Ubuntu® and Debian® releases. These patched versions [incorporate an initial fix](https://patchwork.ozlabs.org/project/glibc/patch/568D5E11.3010301@arm.com/) proposed on the [libc-alpha mailing list](https://sourceware.org/mailman/listinfo/libc-alpha) that mitigate the issue. In the release area of this repository you can find the debian package build artefacts produced by running the build on Ubuntu 18.04 & 20.04 as well as Debian 9, 10 & 11. You can install these artefacts on an appropriate debian-based machine, virtual machine or docker container, using `dpkg -i`. For Almalinux you cand find the appropriate `rpm's` which should also work on UBI and CentOS containers. 10 | 11 | ## Bug Description 12 | The [assert failure at concurrent pthread_create and dlopen](https://sourceware.org/bugzilla/show_bug.cgi?id=19329) glibc bug was first reported in December 2015 and can affect any process on Linux that creates a thread at the same time as opening a dynamic shared object library. Initially the issue was only observable with reasonable frequency on very large scale systems such as high performance computing clusters or cloud scale deployment platforms and so did not receive significant attention. However, early on there were [proposed patches](https://sourceware.org/bugzilla/show_bug.cgi?id=19329) to the library. Large scale systems applied those patches in-house and saw significant benefit. More recently a [proposed complete fix for this](https://sourceware.org/pipermail/libc-alpha/2021-February/122626.html) and a set of related issues has been reviewed by the glibc team and accepted into version 2.34 of glibc (released in August 2021). The 2.34 version of glibc is available in [RHEL 9 beta](https://developers.redhat.com/articles/2021/11/03/red-hat-enterprise-linux-9-beta-here) and [Ubuntu 21.10 (Impish Indri)](https://launchpad.net/ubuntu/+source/glibc). However, there are no plans to backport the fix into previous glibc versions and it is expected that previous versions will be in production use for a significant number of years (e.g. the current end-of-life date for Ubuntu:20.04 is April 2030). 13 | 14 | More recently MathWorks products have made extensive use of a C++ micro-services architecture. This architecture leads to a more dynamic system in which library modules are loaded at the point of use. As a result, the MATLAB® process is more likely to load a library at the same time as creating a thread, and so is more likely to encounter this glibc bug. When this [issue is encountered](https://www.mathworks.com/matlabcentral/answers/1454674-why-does-matlab-crash-on-linux-with-inconsistency-detected-by-ld-so-elf-dl-tls-c-597-_dl_allo) the console that opened MATLAB shows a message similar to the following: 15 | 16 | ``` 17 | Inconsistency detected by ld.so: ../elf/dl-tls.c: 597: _dl_allocate_tls_init: Assertion 'listp != NULL' failed! 18 | ``` 19 | or 20 | ``` 21 | Inconsistency detected by ld.so: dl-tls.c: 493: _dl_allocate_tls_init: Assertion `listp->slotinfo[cnt].gen <= GL(dl_tls_generation)' failed! 22 | ``` 23 | There might also be a stack trace file called `matlab_crash_dump.${PID}` in the users home folder or the current working folder. This usually indicates that a segmentation violation has been detected and the stack trace starts with something similar to the following: 24 | 25 | ``` 26 | Stack Trace (from fault): 27 | [ 0] 0x00002b661142d5a0 /lib64/ld-linux-x86-64.so.2+00075168 _dl_allocate_tls_init+00000080 28 | [ 1] 0x00002b66120c187c /usr/lib64/libpthread.so.0+00034940 pthread_create+00001884 29 | ``` 30 | 31 | If you see these or similar signatures at a sufficient frequency on a system, you might want to consider patching glibc on that system, machine or container. 32 | 33 | ### RHEL 8.4 & 8.5 Update (*27 June 2022*) 34 | 35 | RHEL have just integrated the BZ-19329 patch into [`glibc-2.28-189.1.el8`](https://git.almalinux.org/rpms/glibc/commit/385bc0f199bf51199143fe12b857f4983db76e48). It appear that the change actually went into build [`2.28-175`](https://git.almalinux.org/rpms/glibc/src/commit/385bc0f199bf51199143fe12b857f4983db76e48/SPECS/glibc.spec#L2721) and got released with `2.28-189`. 36 | 37 | Unless you need to use a `pre-189` release of the package you should no longer need to use this repository to patch RHEL and AlmaLinux for BZ-18329 38 | 39 | ## Patch sources 40 | These patches all derive from an [original patch](https://sourceware.org/legacy-ml/libc-alpha/2016-01/msg00480.html) put together by Szabolcs Nagy in January 2016. The 2.24 to 2.28 patches in this repo are derived from this original e-mail and can be downloaded directly from the archive of the `libc-alpha@sourceware.org` mailing list where they were proposed: 41 | 42 | * https://sourceware.org/legacy-ml/libc-alpha/2016-11/msg01092.html 43 | * https://sourceware.org/legacy-ml/libc-alpha/2016-11/msg01093.html 44 | 45 | These 2 patches are directly linked in [the original bug report](https://sourceware.org/bugzilla/show_bug.cgi?id=19329) in comment 7 by Pádraig Brady. In addition, the bug report also has a reference to the original Szabolcs Nagy patch in comment 4 (dated January 2016). The 2 messages above refer back to that original patch via a [message describing the overall problem in more detail](https://sourceware.org/legacy-ml/libc-alpha/2016-11/msg01026.html). 46 | 47 | In addition, in [Sept 2017 Pádraig Brady](https://sourceware.org/bugzilla/show_bug.cgi?id=19329) pointed out that there was an off-by-one error in the original patch that needs to be included 48 | ``` diff 49 | diff --git a/elf/dl-tls.c b/elf/dl-tls.c 50 | index 073321c..2c9ad2a 100644 51 | --- a/elf/dl-tls.c 52 | +++ b/elf/dl-tls.c 53 | @@ -571,7 +571,7 @@ _dl_allocate_tls_init (void *result) 54 | } 55 | 56 | total += cnt; 57 | - if (total >= dtv_slots) 58 | + if (total > dtv_slots) 59 | break; 60 | 61 | /* Synchronize with dl_add_to_slotinfo. */ 62 | ``` 63 | This is source for the final `unsubmitted-bz19329-fixup.v2.27.patch` 64 | 65 | In glibc v2.31 the original source code changed significantly and the patches needed to be slightly adapted so as to match the new codebase. These adapted patches are included here in the `patches/2.31` folder and soft-linked from 2.32 and 2.33. 66 | 67 | ## Acknowledgement and thanks 68 | Many thanks to the broader glibc team and particularly Szabolcs Nagy for providing the original patches and for fixing these issues in glibc v2.34. -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | Dockerfile.debian -------------------------------------------------------------------------------- /Dockerfile.al2: -------------------------------------------------------------------------------- 1 | # Copyright 2025 The MathWorks, Inc. 2 | ARG BUILD_ROOT=/root/ 3 | ARG RPM_DIR=${BUILD_ROOT}/rpmbuild/RPMS/x86_64/ 4 | 5 | ARG ARCH= 6 | ARG DIST_BASE=amazonlinux 7 | ARG DIST_TAG=2 8 | FROM ${DIST_BASE}:${DIST_TAG} AS build-stage 9 | 10 | RUN yum install -y rpm-build make yum-utils 11 | 12 | ARG BUILD_ROOT 13 | WORKDIR ${BUILD_ROOT} 14 | 15 | RUN yumdownloader --source glibc && \ 16 | yum-builddep -y glibc-*.src.rpm && \ 17 | rpm -ivh glibc-*.src.rpm 18 | 19 | ARG GLIBC_VERSION=2.26 20 | COPY scripts/update-specfile-al2.sh ${BUILD_ROOT} 21 | COPY patches/al2/${GLIBC_VERSION} ${BUILD_ROOT}/patches 22 | 23 | RUN cp patches/* rpmbuild/SOURCES && \ 24 | ./update-specfile-al2.sh 25 | 26 | RUN rpmbuild --nocheck -bb rpmbuild/SPECS/glibc.spec 27 | 28 | ARG RPM_DIR 29 | WORKDIR ${RPM_DIR} 30 | 31 | RUN tar -czf all-packages.tar.gz *.rpm 32 | 33 | FROM scratch AS release-stage 34 | ARG RPM_DIR 35 | COPY --from=build-stage ${RPM_DIR}/*.rpm /build/ 36 | COPY --from=build-stage ${RPM_DIR}/all-packages.tar.gz /build/ -------------------------------------------------------------------------------- /Dockerfile.centos: -------------------------------------------------------------------------------- 1 | # Copyright 2025 The MathWorks, Inc. 2 | # This Dockerfile is still a work in progress and may not work 3 | ARG BUILD_ROOT=/root/ 4 | ARG RPM_DIR=${BUILD_ROOT}/rpmbuild/RPMS/x86_64/ 5 | 6 | ARG ARCH= 7 | ARG DIST_BASE=centos 8 | ARG DIST_TAG=centos7.9.2009 9 | FROM ${DIST_BASE}:${DIST_TAG} AS build-stage 10 | 11 | RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/CentOS-*.repo && \ 12 | sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/CentOS-*.repo && \ 13 | sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/CentOS-*.repo 14 | 15 | RUN yum install -y epel-release && \ 16 | yum install -y dnf 17 | 18 | RUN dnf install -y --nodocs dnf-plugins-core && \ 19 | dnf install -y rpm-build make 20 | 21 | ARG BUILD_ROOT 22 | WORKDIR ${BUILD_ROOT} 23 | 24 | RUN dnf download --source glibc && \ 25 | dnf builddep -y --nodocs glibc-*.src.rpm && \ 26 | rpm -ivh glibc-*.src.rpm 27 | 28 | ARG GLIBC_VERSION=2.17 29 | COPY scripts/update-specfile.sh ${BUILD_ROOT} 30 | COPY patches/rhel/${GLIBC_VERSION} ${BUILD_ROOT}/patches 31 | 32 | RUN cp patches/* rpmbuild/SOURCES && \ 33 | ./update-specfile.sh 34 | 35 | RUN rpmbuild --nocheck -bb rpmbuild/SPECS/glibc.spec 36 | 37 | ARG RPM_DIR 38 | WORKDIR ${RPM_DIR} 39 | 40 | RUN tar -czf all-packages.tar.gz *.rpm 41 | 42 | FROM scratch AS release-stage 43 | ARG RPM_DIR 44 | COPY --from=build-stage ${RPM_DIR}/*.rpm /build/ 45 | COPY --from=build-stage ${RPM_DIR}/all-packages.tar.gz /build/ -------------------------------------------------------------------------------- /Dockerfile.debian: -------------------------------------------------------------------------------- 1 | # Copyright 2021 - 2022 The MathWorks, Inc. 2 | ARG BUILD_ROOT=/opt/glibc/src/glibc/ 3 | 4 | # Default to building for glibc 2.31 in ubuntu:20.04 but by specifying 5 | # --build-arg RELEASE=18.04 in the docker build phase this will build for 6 | # glibc 2.27 7 | ARG ARCH= 8 | ARG DIST_BASE=ubuntu 9 | ARG DIST_TAG=20.04 10 | FROM ${ARCH}${DIST_BASE}:${DIST_TAG} AS build-stage 11 | 12 | ARG DIST_BASE 13 | ARG DIST_TAG 14 | ARG OVERRIDE_DIST_RELEASE=false 15 | 16 | ENV DEBIAN_FRONTEND="noninteractive" \ 17 | TZ="Etc/UTC" 18 | 19 | RUN apt-get update && apt-get -y upgrade && \ 20 | apt-get install --no-install-recommends -y \ 21 | quilt \ 22 | nano \ 23 | devscripts 24 | 25 | ARG PKG_EXT 26 | ARG BUILD_ROOT 27 | WORKDIR ${BUILD_ROOT} 28 | 29 | # Build glibc in 3 distinct stages 30 | # 1. Get the build envionment and source code 31 | # 2. Patch the source code 32 | # 3. Build the source code 33 | 34 | COPY scripts/setup-glibc-build-env-vars.sh ${BUILD_ROOT}/ 35 | COPY scripts/get-glibc-src.sh ${BUILD_ROOT}/ 36 | RUN ./get-glibc-src.sh 37 | 38 | COPY patches/debian/ ${BUILD_ROOT}/patches/ 39 | COPY scripts/patch-glibc-src.sh ${BUILD_ROOT}/ 40 | RUN ./patch-glibc-src.sh 41 | 42 | COPY scripts/build-glibc-src.sh ${BUILD_ROOT}/ 43 | RUN ./build-glibc-src.sh 44 | 45 | RUN tar -czf all-packages.tar.gz *.deb 46 | 47 | FROM scratch AS release-stage 48 | ARG BUILD_ROOT 49 | COPY --from=build-stage ${BUILD_ROOT}/*.deb /build/ 50 | COPY --from=build-stage ${BUILD_ROOT}/all-packages.tar.gz /build/ 51 | -------------------------------------------------------------------------------- /Dockerfile.rhel: -------------------------------------------------------------------------------- 1 | # Copyright 2021 - 2022 The MathWorks, Inc. 2 | ARG BUILD_ROOT=/root/ 3 | ARG RPM_DIR=${BUILD_ROOT}/rpmbuild/RPMS/x86_64/ 4 | 5 | 6 | ARG ARCH= 7 | ARG DIST_BASE=almalinux 8 | ARG DIST_TAG=8.5 9 | FROM ${DIST_BASE}:${DIST_TAG} AS build-stage 10 | 11 | RUN dnf install -y --nodocs dnf-plugins-core && \ 12 | dnf config-manager --enable powertools && \ 13 | dnf install -y rpm-build 14 | 15 | ARG BUILD_ROOT 16 | WORKDIR ${BUILD_ROOT} 17 | 18 | RUN dnf download --source glibc && \ 19 | dnf builddep -y --nodocs glibc-*.src.rpm && \ 20 | rpm -ivh glibc-*.src.rpm 21 | 22 | ARG GLIBC_VERSION=2.28-189 23 | COPY scripts/update-specfile.sh ${BUILD_ROOT} 24 | COPY patches/rhel/${GLIBC_VERSION} ${BUILD_ROOT}/patches 25 | 26 | RUN cp patches/* rpmbuild/SOURCES && \ 27 | ./update-specfile.sh 28 | 29 | RUN rpmbuild --nocheck -bb rpmbuild/SPECS/glibc.spec 30 | 31 | ARG RPM_DIR 32 | WORKDIR ${RPM_DIR} 33 | 34 | RUN tar -czf all-packages.tar.gz *.rpm 35 | 36 | FROM scratch AS release-stage 37 | ARG RPM_DIR 38 | COPY --from=build-stage ${RPM_DIR}/*.rpm /build/ 39 | COPY --from=build-stage ${RPM_DIR}/all-packages.tar.gz /build/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Build `glibc` Patchs 2 | ## Summary 3 | This repository provides a method for working around various issues seen in older linux distributions glibc libraries. The glibc libraries are so core to the behaviour of a system that they rarely get updated in older distributions, so we provide ways to patch those libraries. 4 | 5 | ## Issues Currently Patched 6 | * [BZ-19329](BZ-19329.md) ([bugzilla report](https://sourceware.org/bugzilla/show_bug.cgi?id=19329)) is a significant sporadic issue in all glibc versions up to 2.34. 7 | * [BZ-17645](BZ-17645.md) ([bugzilla report](https://sourceware.org/bugzilla/show_bug.cgi?id=17645)) is a significant performance issue on all glibc versions up to 2.35. 8 | 9 | 10 | ### **Caution** 11 | Note that **all** processes on your machine share glibc libraries so these patches will apply to the system as a whole and not just to MathWorks products. Most applications and programs on your computer are likely to use glibc. Care should be taken to ensure you apply the correct version of the patch to your system based on the current version of glibc. Applying the wrong version could make your whole system unusable. Try installing the patch inside a disposable docker container first to test your install procedure – you can find instructions below. 12 | 13 | You can find the major version of glibc you are running using, for example: 14 | 15 | ``` 16 | $ ldd --version ldd 17 | 18 | ldd (Ubuntu GLIBC 2.31-0ubuntu9.2) 2.31 19 | Copyright (C) 2020 Free Software Foundation, Inc. 20 | This is free software; see the source for copying conditions. There is NO 21 | warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 22 | Written by Roland McGrath and Ulrich Drepper. 23 | ``` 24 | 25 | More specificity on version can be found using: 26 | ``` 27 | $ dpkg-query --show libc6:amd64 28 | libc6:amd64 2.31-0ubuntu9.2 29 | ``` 30 | 31 | ## Build procedure 32 | To build a specific version of glibc on your own machine you will need a version of `docker` that supports `BUILDKIT` (this feature was added in version 18.09). This repository holds patches for all glibc versions on debian derived systems from 2.24 to 2.33 inclusive, as well as a version for RHEL 8 with glibc 2.28. Running the build process takes between 10 and 60 mins based on the compute ability of your system. 33 | 34 | ### Pre-built artefacts 35 | This repository runs a number of github actions to build artefacts for specific Debian, Ubuntu and RHEL versions and it is likely that these are all that is needed to patch your system. You can download the matching packages for your system from the release area. 36 | 37 | ### Building 38 | 1. Clone this repository locally and change folder into the repository 39 | ``` 40 | git clone https://github.com/mathworks/build-glibc-bz-19329-patch.git 41 | cd build-glibc-bz19329-patch 42 | ``` 43 | 2. Build (using `docker build`) for the distribution and specific release you want to patch. Select the distribution and specific distribution version by setting the build argument `DIST_BASE` and `DIST_TAG`. `DIST_BASE:DIST_TAG` must be one of 44 | 45 | | | | 46 | | - | - | 47 | | `debian:9` | `debian:stretch` | 48 | | `debian:10`| `debian:buster` | 49 | | `debian:11`| `debian:bullseye` | 50 | | `ubuntu:18.04` | `ubuntu:bionic` | 51 | | `ubuntu:20.04` | `ubuntu:focal` | 52 | | `ubuntu:21.04` | `ubuntu:hirsute` | 53 | | `almalinux:8.4` | | 54 | | `almalinux:8.5` | | 55 | 56 | *Note*: You should only patch RHEL 8.4 or 8.5 if you cannot get `glibc-2.28-189.1.el8` onto the machine via the normal upgrade procedures. 57 | 58 | Here is an example build command (for `debian:9`): 59 | ``` 60 | DOCKER_BUILDKIT=1 docker build --build-arg DIST_BASE=debian --build-arg DIST_TAG=9 --output type=local,dest=. . 61 | ``` 62 | 63 | The build command will use a local container image of the specific distribution requested, or pull one if none exists. To ensure you are building the most up-to-date versions of the libraries you should `docker pull` the specific `DIST_BASE:DIST_TAG` distribution before building. The build progresses and finally will copy the new debian package to a local folder called `./build/`. In that folder will be a libc6 debian package that can be installed on the appropriate distribution. For example having built for `debian:9`, `debian:10`, `debian:11`, and `ubuntu:20.04` the folder contains: 64 | 65 | ``` 66 | $ ls -x build/ 67 | libc6_2.24-11+deb9u4.custom_amd64.deb 68 | libc6_2.27-3ubuntu1.4.custom_amd64.deb 69 | libc6_2.28-10.custom_amd64.deb 70 | libc6_2.31-13+deb11u2.custom_amd64.deb 71 | ``` 72 | 73 | When building for Almalinux you must use the `Dockerfile.rhel` rather than the debian `Dockerfile` so the build command is 74 | ``` 75 | DOCKER_BUILDKIT=1 docker build --build-arg DIST_TAG=8.5 -f Dockerfile.rhel --output type=local,dest=. . 76 | ``` 77 | 78 | If you have access to a RHEL subscription you should be able to adapt the `Dockerfile.rhel` trivially to include the correct repos to support building the sources directly in a `ubi8` container. 79 | 80 | ### Overriding package version string 81 | The package version extension defaults to `.DIST_BASE.DIST_TAG.custom`, where 82 | `${DIST_TAG}` defaults to the `VERSION_CODENAME` found in `/etc/os-release`. This version 83 | can be overridden by setting the build argument `PKG_EXT`. 84 | 85 | E.g., on Debian 11, the default packages will be named like 86 | `libc6_2.31-13+deb11u2.debian.bullseye.custom_amd64.deb`. If built with 87 | `--build-arg PKG_EXT=.test`, the package would instead be named 88 | `libc6_2.31-13+deb11u2.test_amd64.deb`. 89 | 90 | ## Installing the built packages 91 | *Please note the caution above - take care not to install the wrong package version compared to the rest of your system. Consider trying the install in a disposable docker container first.* 92 | 93 | Installing a specific debian package on a system is as simple as executing 94 | ``` 95 | dpkg -i libc6_2.24-11+deb9u4.custom_amd64.deb 96 | ``` 97 | For your system replace the debian package with the correct version that matches the glibc you already have (see for example the output from `dpkg-query --show libc6:amd64`) 98 | 99 | Installing the rpms on a UBI / Almalinux system requires you to install several of the packages at once, for example 100 | ``` 101 | dnf install -y glibc-2.28-164.custom.el8.x86_64.rpm \ 102 | glibc-common-2.28-164.custom.el8.x86_64.rpm \ 103 | glibc-minimal-langpack-2.28-164.custom.el8.x86_64.rpm 104 | ``` 105 | 106 | ### Installing in a Docker container 107 | When building a docker container with a specific patch, assuming the patch is in the top level docker context folder you would have a `Dockerfile` like 108 | ``` docker 109 | FROM debian:9 110 | 111 | COPY libc6_2.24-11+deb9u4.custom_amd64.deb /tmp/ 112 | RUN dpkg -i /tmp/libc6_2.24-11+deb9u4.custom_amd64.deb 113 | ``` 114 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Reporting Security Vulnerabilities 2 | 3 | If you believe you have discovered a security vulnerability, please report it to 4 | [security@mathworks.com](mailto:security@mathworks.com). Please see 5 | [MathWorks Vulnerability Disclosure Policy for Security Researchers](https://www.mathworks.com/company/aboutus/policies_statements/vulnerability-disclosure-policy.html) 6 | for additional information. -------------------------------------------------------------------------------- /patches/al2/2.26/mw-0001-remove-broken-code-path-for-easier-code-review.patch: -------------------------------------------------------------------------------- 1 | From ef6eb8f97d4ebfbb471bdd8d37455f2dadf507a3 Mon Sep 17 00:00:00 2001 2 | From: Szabolcs Nagy 3 | Date: Wed, 30 Nov 2016 11:44:25 +0000 4 | Subject: [PATCH 1/5] remove broken code path for easier code review 5 | 6 | This patch is not necessary for the bug fix, just makes concurrency 7 | code review easier (removes a data race and overflow from a broken 8 | code path). 9 | 10 | dlopen can oom crash anyway in _dl_resize_dtv and it's probably 11 | better to crash than leave half setup modules around. 12 | 13 | 2016-11-30 Szabolcs Nagy 14 | 15 | * elf/dl-tls.c (_dl_add_to_slotinfo): OOM crash. 16 | --- 17 | elf/dl-tls.c | 16 ++++------------ 18 | 1 file changed, 4 insertions(+), 12 deletions(-) 19 | 20 | diff --git a/elf/dl-tls.c b/elf/dl-tls.c 21 | index 5aba33b3fa..4daf88af6e 100644 22 | --- a/elf/dl-tls.c 23 | +++ b/elf/dl-tls.c 24 | @@ -927,18 +927,10 @@ _dl_add_to_slotinfo (struct link_map *l) 25 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 26 | if (listp == NULL) 27 | { 28 | - /* We ran out of memory. We will simply fail this 29 | - call but don't undo anything we did so far. The 30 | - application will crash or be terminated anyway very 31 | - soon. */ 32 | - 33 | - /* We have to do this since some entries in the dtv 34 | - slotinfo array might already point to this 35 | - generation. */ 36 | - ++GL(dl_tls_generation); 37 | - 38 | - _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\ 39 | -cannot create TLS data structures")); 40 | + /* We ran out of memory in dlopen while updating tls structures. 41 | + TODO: side-effects should be rolled back and the failure should 42 | + be reported to the caller, but that's hard. */ 43 | + oom (); 44 | } 45 | 46 | listp->len = TLS_SLOTINFO_SURPLUS; 47 | -- 48 | 2.39.5 49 | 50 | -------------------------------------------------------------------------------- /patches/al2/2.26/mw-0002-Fix-data-races-between-pthread_create-and-dlopen.patch: -------------------------------------------------------------------------------- 1 | From b65f2c16f389d82b54404df97eed838630b1ee90 Mon Sep 17 00:00:00 2001 2 | From: Szabolcs Nagy 3 | Date: Wed, 30 Nov 2016 11:44:32 +0000 4 | Subject: [PATCH 2/5] Fix data races between pthread_create and dlopen 5 | 6 | This fixes a subset of the issues described in 7 | https://sourceware.org/ml/libc-alpha/2016-11/msg01026.html 8 | without adding locks to pthread_create. 9 | 10 | Only races between dlopen and pthread_create were considered, 11 | and the asserts got removed that tried to check for concurrency 12 | issues. 13 | 14 | The patch is incomplete because dlclose, tls access and 15 | dl_iterate_phdr related code paths are not modified. 16 | 17 | dlclose should be updated in a similar fashion to dlopen 18 | to make the patch complete alternatively pthread_create 19 | may take the GL(dl_load_write_lock) to sync with dlclose 20 | or the GL(dl_load_lock) to sync with dlopen and dlclose 21 | (that would simplify the concurrency design, but increase 22 | lock contention on the locks). 23 | 24 | 2016-11-30 Szabolcs Nagy 25 | 26 | [BZ #19329] 27 | * elf/dl-open.c (dl_open_worker): Write GL(dl_tls_generation) 28 | atomically. 29 | * elf/dl-tls.c (_dl_allocate_tls_init): Read GL(dl_tls_generation), 30 | GL(dl_tls_max_dtv_idx), slotinfo entries and listp->next atomically. 31 | Remove assertions that cannot be guaranteed. 32 | (_dl_add_to_slotinfo): Write the slotinfo entries and listp->next 33 | atomically. 34 | --- 35 | elf/dl-open.c | 12 +++++-- 36 | elf/dl-tls.c | 87 ++++++++++++++++++++++++++++++++++++++++++++------- 37 | 2 files changed, 86 insertions(+), 13 deletions(-) 38 | 39 | diff --git a/elf/dl-open.c b/elf/dl-open.c 40 | index cec54db413..a45319e5fc 100644 41 | --- a/elf/dl-open.c 42 | +++ b/elf/dl-open.c 43 | @@ -524,9 +524,17 @@ dl_open_worker (void *a) 44 | } 45 | 46 | /* Bump the generation number if necessary. */ 47 | - if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0)) 48 | - _dl_fatal_printf (N_("\ 49 | + if (any_tls) 50 | + { 51 | + /* This cannot be in a data-race so non-atomic load is valid too. */ 52 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 53 | + /* Synchronize with _dl_allocate_tls_init (see notes there) and 54 | + avoid storing an overflowed counter. */ 55 | + if (__builtin_expect (newgen == 0, 0)) 56 | + _dl_fatal_printf (N_("\ 57 | TLS generation counter wrapped! Please report this.")); 58 | + atomic_store_release (&GL(dl_tls_generation), newgen); 59 | + } 60 | 61 | /* We need a second pass for static tls data, because _dl_update_slotinfo 62 | must not be run while calls to _dl_add_to_slotinfo are still pending. */ 63 | diff --git a/elf/dl-tls.c b/elf/dl-tls.c 64 | index 4daf88af6e..c60bbd72ea 100644 65 | --- a/elf/dl-tls.c 66 | +++ b/elf/dl-tls.c 67 | @@ -438,6 +438,36 @@ _dl_resize_dtv (dtv_t *dtv) 68 | } 69 | 70 | 71 | +/* 72 | +CONCURRENCY NOTES 73 | + 74 | +dlopen (and dlclose) holds the GL(dl_load_lock) while writing shared state, 75 | +which may be concurrently read by pthread_create and tls access without taking 76 | +the lock, so atomic access should be used. The shared state: 77 | + 78 | + GL(dl_tls_max_dtv_idx) - max modid assigned, (modid can be reused). 79 | + GL(dl_tls_generation) - generation count, incremented by dlopen and dlclose. 80 | + GL(dl_tls_dtv_slotinfo_list) - list of entries, contains generation count 81 | + and link_map for each module with a modid. 82 | + 83 | +A module gets a modid assigned if it has tls, a modid identifies a slotinfo 84 | +entry and it is the index of the corresponding dtv slot. The generation count 85 | +is assigned to slotinfo entries of a newly loaded or unloaded module and its 86 | +newly loaded or unloaded dependencies. 87 | + 88 | +TODO: dlclose may free memory read by a concurrent pthread_create or tls 89 | +access. This is broken now, so it is assumed that dlclose does not free 90 | +link_map structures while pthread_create or __tls_get_addr is reading them. 91 | + 92 | +pthread_create calls _dl_allocate_tls_init (before creating the new thread), 93 | +which should guarantee that the dtv is in a consistent state at the end: 94 | + 95 | +All slotinfo updates with generation <= dtv[0].counter are reflected in the 96 | +dtv and arbitrary later module unloads may also be reflected as unallocated 97 | +entries. (Note: a modid reuse implies a module unload and accessing tls in 98 | +an unloaded module is undefined.) 99 | +*/ 100 | + 101 | void * 102 | internal_function 103 | _dl_allocate_tls_init (void *result) 104 | @@ -450,12 +480,24 @@ _dl_allocate_tls_init (void *result) 105 | struct dtv_slotinfo_list *listp; 106 | size_t total = 0; 107 | size_t maxgen = 0; 108 | + /* Synchronizes with the increments in dl_{open,close}_worker. 109 | + Slotinfo updates of this generation are sequenced before the 110 | + write we read from here. */ 111 | + size_t gen_count = atomic_load_acquire (&GL(dl_tls_generation)); 112 | + /* Either reads from the last write that is sequenced before the 113 | + generation counter increment we synchronized with or a write 114 | + made by a later dlopen/dlclose. dlclose may decrement this, 115 | + but only if related modules are unloaded. So it is an upper 116 | + bound on non-unloaded modids up to gen_count generation. */ 117 | + size_t dtv_slots = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); 118 | 119 | /* Check if the current dtv is big enough. */ 120 | - if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) 121 | + if (dtv[-1].counter < dtv_slots) 122 | { 123 | /* Resize the dtv. */ 124 | dtv = _dl_resize_dtv (dtv); 125 | + /* _dl_resize_dtv rereads GL(dl_tls_max_dtv_idx) which may decrease. */ 126 | + dtv_slots = dtv[-1].counter; 127 | 128 | /* Install this new dtv in the thread data structures. */ 129 | INSTALL_DTV (result, &dtv[-1]); 130 | @@ -472,22 +514,33 @@ _dl_allocate_tls_init (void *result) 131 | for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) 132 | { 133 | struct link_map *map; 134 | + size_t gen; 135 | void *dest; 136 | 137 | /* Check for the total number of used slots. */ 138 | - if (total + cnt > GL(dl_tls_max_dtv_idx)) 139 | + if (total + cnt > dtv_slots) 140 | break; 141 | 142 | - map = listp->slotinfo[cnt].map; 143 | + /* Synchronize with dl_add_to_slotinfo and remove_slotinfo. */ 144 | + map = atomic_load_acquire (&listp->slotinfo[cnt].map); 145 | if (map == NULL) 146 | /* Unused entry. */ 147 | continue; 148 | 149 | + /* Consistent generation count with the map read above. 150 | + Inconsistent gen may be read if the entry is being reused, 151 | + in which case it is larger than gen_count and we skip it. */ 152 | + gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); 153 | + if (gen > gen_count) 154 | + /* New entry. */ 155 | + continue; 156 | + 157 | /* Keep track of the maximum generation number. This might 158 | not be the generation counter. */ 159 | - assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation)); 160 | - maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); 161 | + maxgen = MAX (maxgen, gen); 162 | 163 | + /* TODO: concurrent dlclose may free map which would break 164 | + the rest of the code below. */ 165 | dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; 166 | dtv[map->l_tls_modid].pointer.to_free = NULL; 167 | 168 | @@ -517,11 +570,15 @@ _dl_allocate_tls_init (void *result) 169 | } 170 | 171 | total += cnt; 172 | - if (total >= GL(dl_tls_max_dtv_idx)) 173 | + if (total >= dtv_slots) 174 | break; 175 | 176 | - listp = listp->next; 177 | - assert (listp != NULL); 178 | + /* Synchronize with dl_add_to_slotinfo. */ 179 | + listp = atomic_load_acquire (&listp->next); 180 | + /* dtv_slots is an upper bound on the number of entries we care 181 | + about, the list may end sooner. */ 182 | + if (listp == NULL) 183 | + break; 184 | } 185 | 186 | /* The DTV version is up-to-date now. */ 187 | @@ -922,7 +979,7 @@ _dl_add_to_slotinfo (struct link_map *l) 188 | the first slot. */ 189 | assert (idx == 0); 190 | 191 | - listp = prevp->next = (struct dtv_slotinfo_list *) 192 | + listp = (struct dtv_slotinfo_list *) 193 | malloc (sizeof (struct dtv_slotinfo_list) 194 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 195 | if (listp == NULL) 196 | @@ -937,9 +994,17 @@ _dl_add_to_slotinfo (struct link_map *l) 197 | listp->next = NULL; 198 | memset (listp->slotinfo, '\0', 199 | TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 200 | + /* Add the new list item and synchronize with _dl_allocate_tls_init. */ 201 | + atomic_store_release (&prevp->next, listp); 202 | } 203 | 204 | /* Add the information into the slotinfo data structure. */ 205 | - listp->slotinfo[idx].map = l; 206 | - listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; 207 | + 208 | + /* This cannot be in a data-race so non-atomic load would be valid too. */ 209 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 210 | + /* TODO: Concurrent readers may see an overflowed gen, which is bad, 211 | + but overflow is guaranteed to crash the dlopen that is executing. */ 212 | + atomic_store_relaxed (&listp->slotinfo[idx].gen, newgen); 213 | + /* Synchronize with _dl_allocate_tls_init (see notes there). */ 214 | + atomic_store_release (&listp->slotinfo[idx].map, l); 215 | } 216 | -- 217 | 2.39.5 218 | 219 | -------------------------------------------------------------------------------- /patches/al2/2.26/mw-0003-BZ19329-fixup.patch: -------------------------------------------------------------------------------- 1 | From 82a104f513447ebcf0e43ad6f0a49fdfeff2a48b Mon Sep 17 00:00:00 2001 2 | From: Mike Gulick 3 | Date: Wed, 15 Jan 2025 19:10:55 -0500 4 | Subject: [PATCH 3/5] BZ19329 fixup 5 | 6 | --- 7 | elf/dl-tls.c | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/elf/dl-tls.c b/elf/dl-tls.c 11 | index c60bbd72ea..305eae3d6d 100644 12 | --- a/elf/dl-tls.c 13 | +++ b/elf/dl-tls.c 14 | @@ -570,7 +570,7 @@ _dl_allocate_tls_init (void *result) 15 | } 16 | 17 | total += cnt; 18 | - if (total >= dtv_slots) 19 | + if (total > dtv_slots) 20 | break; 21 | 22 | /* Synchronize with dl_add_to_slotinfo. */ 23 | -- 24 | 2.39.5 25 | 26 | -------------------------------------------------------------------------------- /patches/al2/2.26/mw-0004-Consolidate-link-map-sorting.patch: -------------------------------------------------------------------------------- 1 | From 8a0bc360b2dfc015fc53d7168e8bd8d52c3877c9 Mon Sep 17 00:00:00 2001 2 | From: Andreas Schwab 3 | Date: Tue, 7 Nov 2017 15:24:19 +0100 4 | Subject: [PATCH 4/5] Consolidate link map sorting 5 | 6 | Combine the four places where link maps are sorted into a single function. 7 | This also moves the logic to skip the first map (representing the main 8 | binary) to the callers. 9 | 10 | Mathworks backport to glibc-2.26 for AL2: 11 | 12 | Backported c2c299fd24e8 to glibc-2.26 on top of a0bc5dd3be, which is the 13 | glibc commit used as the base for the AL2 package, plus a ton of other 14 | patches from the AL2 SRPM. This patch is required to be able to apply 15 | another patch to fix BZ17645. 16 | 17 | Coded-by: Mike Gulick 18 | --- 19 | ChangeLog | 13 ++++ 20 | elf/Makefile | 2 +- 21 | elf/dl-close.c | 6 +- 22 | elf/dl-deps.c | 59 +----------------- 23 | elf/dl-fini.c | 106 ++------------------------------ 24 | elf/dl-open.c | 57 +---------------- 25 | elf/dl-sort-maps.c | 122 +++++++++++++++++++++++++++++++++++++ 26 | sysdeps/generic/ldsodefs.h | 18 +++++- 27 | 8 files changed, 166 insertions(+), 217 deletions(-) 28 | create mode 100644 elf/dl-sort-maps.c 29 | 30 | diff --git a/ChangeLog b/ChangeLog 31 | index 005225efac..92565e08be 100644 32 | --- a/ChangeLog 33 | +++ b/ChangeLog 34 | @@ -1,3 +1,16 @@ 35 | +2017-11-27 Andreas Schwab 36 | + 37 | + * elf/Makefile (dl-routines): Add dl-sort-maps. 38 | + * elf/dl-sort-maps.c: New file. 39 | + * sysdeps/generic/ldsodefs.h (_dl_sort_fini): Don't declare. 40 | + (_dl_sort_maps): Declare. 41 | + * elf/dl-fini.c (_dl_sort_fini): Remove. 42 | + (_dl_fini): Use _dl_sort_maps instead of _dl_sort_fini. 43 | + * elf/dl-close.c (_dl_close_worker): Likewise. 44 | + * elf/dl-deps.c (_dl_map_object_deps): Use _dl_sort_maps instead of 45 | + open-coding it. 46 | + * elf/dl-open.c (dl_open_worker): Likewise. 47 | + 48 | 2019-07-10 Szabolcs Nagy 49 | 50 | * sysdeps/aarch64/dl-machine.h (elf_machine_runtime_setup): Remove the 51 | diff --git a/elf/Makefile b/elf/Makefile 52 | index bf0bc3fe6f..4beefd0415 100644 53 | --- a/elf/Makefile 54 | +++ b/elf/Makefile 55 | @@ -31,7 +31,7 @@ routines = $(all-dl-routines) dl-support dl-iteratephdr \ 56 | dl-routines = $(addprefix dl-,load lookup object reloc deps hwcaps \ 57 | runtime init fini debug misc \ 58 | version profile tls origin scope \ 59 | - execstack caller open close trampoline) 60 | + execstack caller open close trampoline sort-maps) 61 | ifeq (yes,$(use-ldconfig)) 62 | dl-routines += dl-cache 63 | endif 64 | diff --git a/elf/dl-close.c b/elf/dl-close.c 65 | index 2b46b7cf8b..3dd75c8725 100644 66 | --- a/elf/dl-close.c 67 | +++ b/elf/dl-close.c 68 | @@ -241,8 +241,10 @@ _dl_close_worker (struct link_map *map, bool force) 69 | } 70 | } 71 | 72 | - /* Sort the entries. */ 73 | - _dl_sort_fini (maps, nloaded, used, nsid); 74 | + /* Sort the entries. We can skip looking for the binary itself which is 75 | + at the front of the search list for the main namespace. */ 76 | + _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE), 77 | + used + (nsid == LM_ID_BASE), true); 78 | 79 | /* Call all termination functions at once. */ 80 | #ifdef SHARED 81 | diff --git a/elf/dl-deps.c b/elf/dl-deps.c 82 | index bc59f0ff7b..7f8e228c17 100644 83 | --- a/elf/dl-deps.c 84 | +++ b/elf/dl-deps.c 85 | @@ -614,62 +614,9 @@ Filters not supported with LD_TRACE_PRELINKING")); 86 | itself will always be initialize last. */ 87 | memcpy (l_initfini, map->l_searchlist.r_list, 88 | nlist * sizeof (struct link_map *)); 89 | - if (__glibc_likely (nlist > 1)) 90 | - { 91 | - /* We can skip looking for the binary itself which is at the front 92 | - of the search list. */ 93 | - i = 1; 94 | - uint16_t seen[nlist]; 95 | - memset (seen, 0, nlist * sizeof (seen[0])); 96 | - while (1) 97 | - { 98 | - /* Keep track of which object we looked at this round. */ 99 | - ++seen[i]; 100 | - struct link_map *thisp = l_initfini[i]; 101 | - 102 | - /* Find the last object in the list for which the current one is 103 | - a dependency and move the current object behind the object 104 | - with the dependency. */ 105 | - unsigned int k = nlist - 1; 106 | - while (k > i) 107 | - { 108 | - struct link_map **runp = l_initfini[k]->l_initfini; 109 | - if (runp != NULL) 110 | - /* Look through the dependencies of the object. */ 111 | - while (*runp != NULL) 112 | - if (__glibc_unlikely (*runp++ == thisp)) 113 | - { 114 | - /* Move the current object to the back past the last 115 | - object with it as the dependency. */ 116 | - memmove (&l_initfini[i], &l_initfini[i + 1], 117 | - (k - i) * sizeof (l_initfini[0])); 118 | - l_initfini[k] = thisp; 119 | - 120 | - if (seen[i + 1] > nlist - i) 121 | - { 122 | - ++i; 123 | - goto next_clear; 124 | - } 125 | - 126 | - uint16_t this_seen = seen[i]; 127 | - memmove (&seen[i], &seen[i + 1], 128 | - (k - i) * sizeof (seen[0])); 129 | - seen[k] = this_seen; 130 | - 131 | - goto next; 132 | - } 133 | - 134 | - --k; 135 | - } 136 | - 137 | - if (++i == nlist) 138 | - break; 139 | - next_clear: 140 | - memset (&seen[i], 0, (nlist - i) * sizeof (seen[0])); 141 | - 142 | - next:; 143 | - } 144 | - } 145 | + /* We can skip looking for the binary itself which is at the front of 146 | + the search list. */ 147 | + _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false); 148 | 149 | /* Terminate the list of dependencies. */ 150 | l_initfini[nlist] = NULL; 151 | diff --git a/elf/dl-fini.c b/elf/dl-fini.c 152 | index 93b337bea1..cd3266e8f9 100644 153 | --- a/elf/dl-fini.c 154 | +++ b/elf/dl-fini.c 155 | @@ -25,105 +25,6 @@ 156 | typedef void (*fini_t) (void); 157 | 158 | 159 | -void 160 | -internal_function 161 | -_dl_sort_fini (struct link_map **maps, size_t nmaps, char *used, Lmid_t ns) 162 | -{ 163 | - /* A list of one element need not be sorted. */ 164 | - if (nmaps == 1) 165 | - return; 166 | - 167 | - /* We can skip looking for the binary itself which is at the front 168 | - of the search list for the main namespace. */ 169 | - unsigned int i = ns == LM_ID_BASE; 170 | - uint16_t seen[nmaps]; 171 | - memset (seen, 0, nmaps * sizeof (seen[0])); 172 | - while (1) 173 | - { 174 | - /* Keep track of which object we looked at this round. */ 175 | - ++seen[i]; 176 | - struct link_map *thisp = maps[i]; 177 | - 178 | - /* Do not handle ld.so in secondary namespaces and object which 179 | - are not removed. */ 180 | - if (thisp != thisp->l_real || thisp->l_idx == -1) 181 | - goto skip; 182 | - 183 | - /* Find the last object in the list for which the current one is 184 | - a dependency and move the current object behind the object 185 | - with the dependency. */ 186 | - unsigned int k = nmaps - 1; 187 | - while (k > i) 188 | - { 189 | - struct link_map **runp = maps[k]->l_initfini; 190 | - if (runp != NULL) 191 | - /* Look through the dependencies of the object. */ 192 | - while (*runp != NULL) 193 | - if (__glibc_unlikely (*runp++ == thisp)) 194 | - { 195 | - move: 196 | - /* Move the current object to the back past the last 197 | - object with it as the dependency. */ 198 | - memmove (&maps[i], &maps[i + 1], 199 | - (k - i) * sizeof (maps[0])); 200 | - maps[k] = thisp; 201 | - 202 | - if (used != NULL) 203 | - { 204 | - char here_used = used[i]; 205 | - memmove (&used[i], &used[i + 1], 206 | - (k - i) * sizeof (used[0])); 207 | - used[k] = here_used; 208 | - } 209 | - 210 | - if (seen[i + 1] > nmaps - i) 211 | - { 212 | - ++i; 213 | - goto next_clear; 214 | - } 215 | - 216 | - uint16_t this_seen = seen[i]; 217 | - memmove (&seen[i], &seen[i + 1], (k - i) * sizeof (seen[0])); 218 | - seen[k] = this_seen; 219 | - 220 | - goto next; 221 | - } 222 | - 223 | - if (__glibc_unlikely (maps[k]->l_reldeps != NULL)) 224 | - { 225 | - unsigned int m = maps[k]->l_reldeps->act; 226 | - struct link_map **relmaps = &maps[k]->l_reldeps->list[0]; 227 | - 228 | - /* Look through the relocation dependencies of the object. */ 229 | - while (m-- > 0) 230 | - if (__glibc_unlikely (relmaps[m] == thisp)) 231 | - { 232 | - /* If a cycle exists with a link time dependency, 233 | - preserve the latter. */ 234 | - struct link_map **runp = thisp->l_initfini; 235 | - if (runp != NULL) 236 | - while (*runp != NULL) 237 | - if (__glibc_unlikely (*runp++ == maps[k])) 238 | - goto ignore; 239 | - goto move; 240 | - } 241 | - ignore:; 242 | - } 243 | - 244 | - --k; 245 | - } 246 | - 247 | - skip: 248 | - if (++i == nmaps) 249 | - break; 250 | - next_clear: 251 | - memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 252 | - 253 | - next:; 254 | - } 255 | -} 256 | - 257 | - 258 | void 259 | internal_function 260 | _dl_fini (void) 261 | @@ -188,8 +89,11 @@ _dl_fini (void) 262 | assert (ns == LM_ID_BASE || i == nloaded || i == nloaded - 1); 263 | unsigned int nmaps = i; 264 | 265 | - /* Now we have to do the sorting. */ 266 | - _dl_sort_fini (maps, nmaps, NULL, ns); 267 | + /* Now we have to do the sorting. We can skip looking for the 268 | + binary itself which is at the front of the search list for 269 | + the main namespace. */ 270 | + _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE), 271 | + NULL, true); 272 | 273 | /* We do not rely on the linked list of loaded object anymore 274 | from this point on. We have our own list here (maps). The 275 | diff --git a/elf/dl-open.c b/elf/dl-open.c 276 | index a45319e5fc..6524c3cc63 100644 277 | --- a/elf/dl-open.c 278 | +++ b/elf/dl-open.c 279 | @@ -313,7 +313,7 @@ dl_open_worker (void *a) 280 | /* Sort the objects by dependency for the relocation process. This 281 | allows IFUNC relocations to work and it also means copy 282 | relocation of dependencies are if necessary overwritten. */ 283 | - size_t nmaps = 0; 284 | + unsigned int nmaps = 0; 285 | struct link_map *l = new; 286 | do 287 | { 288 | @@ -332,62 +332,11 @@ dl_open_worker (void *a) 289 | l = l->l_next; 290 | } 291 | while (l != NULL); 292 | - if (nmaps > 1) 293 | - { 294 | - uint16_t seen[nmaps]; 295 | - memset (seen, '\0', sizeof (seen)); 296 | - size_t i = 0; 297 | - while (1) 298 | - { 299 | - ++seen[i]; 300 | - struct link_map *thisp = maps[i]; 301 | - 302 | - /* Find the last object in the list for which the current one is 303 | - a dependency and move the current object behind the object 304 | - with the dependency. */ 305 | - size_t k = nmaps - 1; 306 | - while (k > i) 307 | - { 308 | - struct link_map **runp = maps[k]->l_initfini; 309 | - if (runp != NULL) 310 | - /* Look through the dependencies of the object. */ 311 | - while (*runp != NULL) 312 | - if (__glibc_unlikely (*runp++ == thisp)) 313 | - { 314 | - /* Move the current object to the back past the last 315 | - object with it as the dependency. */ 316 | - memmove (&maps[i], &maps[i + 1], 317 | - (k - i) * sizeof (maps[0])); 318 | - maps[k] = thisp; 319 | - 320 | - if (seen[i + 1] > nmaps - i) 321 | - { 322 | - ++i; 323 | - goto next_clear; 324 | - } 325 | - 326 | - uint16_t this_seen = seen[i]; 327 | - memmove (&seen[i], &seen[i + 1], 328 | - (k - i) * sizeof (seen[0])); 329 | - seen[k] = this_seen; 330 | - 331 | - goto next; 332 | - } 333 | - 334 | - --k; 335 | - } 336 | - 337 | - if (++i == nmaps) 338 | - break; 339 | - next_clear: 340 | - memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 341 | - next:; 342 | - } 343 | - } 344 | + _dl_sort_maps (maps, nmaps, NULL, false); 345 | 346 | int relocation_in_progress = 0; 347 | 348 | - for (size_t i = nmaps; i-- > 0; ) 349 | + for (unsigned int i = nmaps; i-- > 0; ) 350 | { 351 | l = maps[i]; 352 | 353 | diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c 354 | new file mode 100644 355 | index 0000000000..416e8904ad 356 | --- /dev/null 357 | +++ b/elf/dl-sort-maps.c 358 | @@ -0,0 +1,122 @@ 359 | +/* Sort array of link maps according to dependencies. 360 | + Copyright (C) 2017 Free Software Foundation, Inc. 361 | + This file is part of the GNU C Library. 362 | + 363 | + The GNU C Library is free software; you can redistribute it and/or 364 | + modify it under the terms of the GNU Lesser General Public 365 | + License as published by the Free Software Foundation; either 366 | + version 2.1 of the License, or (at your option) any later version. 367 | + 368 | + The GNU C Library is distributed in the hope that it will be useful, 369 | + but WITHOUT ANY WARRANTY; without even the implied warranty of 370 | + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 371 | + Lesser General Public License for more details. 372 | + 373 | + You should have received a copy of the GNU Lesser General Public 374 | + License along with the GNU C Library; if not, see 375 | + . */ 376 | + 377 | +#include 378 | + 379 | + 380 | +/* Sort array MAPS according to dependencies of the contained objects. 381 | + Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is 382 | + called for finishing an object. */ 383 | +void 384 | +_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, 385 | + bool for_fini) 386 | +{ 387 | + /* A list of one element need not be sorted. */ 388 | + if (nmaps <= 1) 389 | + return; 390 | + 391 | + unsigned int i = 0; 392 | + uint16_t seen[nmaps]; 393 | + memset (seen, 0, nmaps * sizeof (seen[0])); 394 | + while (1) 395 | + { 396 | + /* Keep track of which object we looked at this round. */ 397 | + ++seen[i]; 398 | + struct link_map *thisp = maps[i]; 399 | + 400 | + if (__glibc_unlikely (for_fini)) 401 | + { 402 | + /* Do not handle ld.so in secondary namespaces and objects which 403 | + are not removed. */ 404 | + if (thisp != thisp->l_real || thisp->l_idx == -1) 405 | + goto skip; 406 | + } 407 | + 408 | + /* Find the last object in the list for which the current one is 409 | + a dependency and move the current object behind the object 410 | + with the dependency. */ 411 | + unsigned int k = nmaps - 1; 412 | + while (k > i) 413 | + { 414 | + struct link_map **runp = maps[k]->l_initfini; 415 | + if (runp != NULL) 416 | + /* Look through the dependencies of the object. */ 417 | + while (*runp != NULL) 418 | + if (__glibc_unlikely (*runp++ == thisp)) 419 | + { 420 | + move: 421 | + /* Move the current object to the back past the last 422 | + object with it as the dependency. */ 423 | + memmove (&maps[i], &maps[i + 1], 424 | + (k - i) * sizeof (maps[0])); 425 | + maps[k] = thisp; 426 | + 427 | + if (used != NULL) 428 | + { 429 | + char here_used = used[i]; 430 | + memmove (&used[i], &used[i + 1], 431 | + (k - i) * sizeof (used[0])); 432 | + used[k] = here_used; 433 | + } 434 | + 435 | + if (seen[i + 1] > nmaps - i) 436 | + { 437 | + ++i; 438 | + goto next_clear; 439 | + } 440 | + 441 | + uint16_t this_seen = seen[i]; 442 | + memmove (&seen[i], &seen[i + 1], (k - i) * sizeof (seen[0])); 443 | + seen[k] = this_seen; 444 | + 445 | + goto next; 446 | + } 447 | + 448 | + if (__glibc_unlikely (for_fini && maps[k]->l_reldeps != NULL)) 449 | + { 450 | + unsigned int m = maps[k]->l_reldeps->act; 451 | + struct link_map **relmaps = &maps[k]->l_reldeps->list[0]; 452 | + 453 | + /* Look through the relocation dependencies of the object. */ 454 | + while (m-- > 0) 455 | + if (__glibc_unlikely (relmaps[m] == thisp)) 456 | + { 457 | + /* If a cycle exists with a link time dependency, 458 | + preserve the latter. */ 459 | + struct link_map **runp = thisp->l_initfini; 460 | + if (runp != NULL) 461 | + while (*runp != NULL) 462 | + if (__glibc_unlikely (*runp++ == maps[k])) 463 | + goto ignore; 464 | + goto move; 465 | + } 466 | + ignore:; 467 | + } 468 | + 469 | + --k; 470 | + } 471 | + 472 | + skip: 473 | + if (++i == nmaps) 474 | + break; 475 | + next_clear: 476 | + memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 477 | + 478 | + next:; 479 | + } 480 | +} 481 | diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h 482 | index 4508365871..b15b3aad34 100644 483 | --- a/sysdeps/generic/ldsodefs.h 484 | +++ b/sysdeps/generic/ldsodefs.h 485 | @@ -908,9 +908,21 @@ extern void _dl_init (struct link_map *main_map, int argc, char **argv, 486 | extern void _dl_fini (void) internal_function; 487 | 488 | /* Sort array MAPS according to dependencies of the contained objects. */ 489 | -extern void _dl_sort_fini (struct link_map **maps, size_t nmaps, char *used, 490 | - Lmid_t ns) 491 | - internal_function attribute_hidden; 492 | +/* Mike Gulick (MathWorks) (2025-01-15) When backporting c2c299fd24 to 493 | + glibc-2.26, not sure if this function should have 'internal_function 494 | + attribute_hidden' or just 'attribute_hidden'. The 'internal_function' 495 | + attribute was removed between 2.26 and 2.27 in b3f85fd2e4, however in 2.26 496 | + the previous version of this function, _dl_sort_fini, had the 497 | + internal_function and attribute_hidden attributes. I think this shouldn't 498 | + matter because according to the comment in 9fa7449b35, the internal_function 499 | + attribute only applies to i386 builds. 500 | +*/ 501 | +/* 502 | +extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 503 | + char *used, bool for_fini) attribute_hidden; 504 | +*/ 505 | +extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 506 | + char *used, bool for_fini) internal_function attribute_hidden; 507 | 508 | /* The dynamic linker calls this function before and having changing 509 | any shared object mappings. The `r_state' member of `struct r_debug' 510 | -- 511 | 2.39.5 512 | 513 | -------------------------------------------------------------------------------- /patches/al2/2.26/mw-0005-Backport-BZ17645-patch-to-glibc-2.28.patch: -------------------------------------------------------------------------------- 1 | From 6fbd831dbfc44e98aafbdeff72f200a9354a4a6d Mon Sep 17 00:00:00 2001 2 | From: Mike Gulick 3 | Date: Wed, 15 Jan 2025 18:25:41 -0500 4 | Subject: [PATCH 5/5] Backport BZ17645 patch to glibc-2.28 5 | 6 | This second patch contains the actual implementation of a new sorting algorithm 7 | for shared objects in the dynamic loader, which solves the slow behavior that 8 | the current "old" algorithm falls into when the DSO set contains circular 9 | dependencies. 10 | 11 | The new algorithm implemented here is simply depth-first search (DFS) to obtain 12 | the Reverse-Post Order (RPO) sequence, a topological sort. A new l_visited:1 13 | bitfield is added to struct link_map to more elegantly facilitate such a search. 14 | 15 | The DFS algorithm is applied to the input maps[nmap-1] backwards towards 16 | maps[0]. This has the effect of a more "shallow" recursion depth in general 17 | since the input is in BFS. Also, when combined with the natural order of 18 | processing l_initfini[] at each node, this creates a resulting output sorting 19 | closer to the intuitive "left-to-right" order in most cases. 20 | 21 | Another notable implementation adjustment related to this _dl_sort_maps change 22 | is the removing of two char arrays 'used' and 'done' in _dl_close_worker to 23 | represent two per-map attributes. This has been changed to simply use two new 24 | bit-fields l_map_used:1, l_map_done:1 added to struct link_map. This also allows 25 | discarding the clunky 'used' array sorting that _dl_sort_maps had to sometimes 26 | do along the way. 27 | 28 | Tunable support for switching between different sorting algorithms at runtime is 29 | also added. A new tunable 'glibc.rtld.dynamic_sort' with current valid values 1 30 | (old algorithm) and 2 (new DFS algorithm) has been added. At time of commit 31 | of this patch, the default setting is 1 (old algorithm). 32 | 33 | Signed-off-by: Chung-Lin Tang 34 | Reviewed-by: Adhemerval Zanella 35 | 36 | Mathworks backport to glibc 2.28 for RHEL, remove tunable support and set default to the DFS sort map behavior 37 | Coded-by: Alan Li 38 | 39 | Mathworks backport to glibc 2.26 for AL2 40 | Coded-by: Mike Gulick 41 | --- 42 | elf/dl-close.c | 36 +++--- 43 | elf/dl-deps.c | 2 +- 44 | elf/dl-fini.c | 3 +- 45 | elf/dl-open.c | 2 +- 46 | elf/dl-sort-maps.c | 241 +++++++++++++++++++++++-------------- 47 | elf/rtld.c | 3 + 48 | include/link.h | 5 + 49 | sysdeps/generic/ldsodefs.h | 2 +- 50 | 8 files changed, 179 insertions(+), 115 deletions(-) 51 | 52 | diff --git a/elf/dl-close.c b/elf/dl-close.c 53 | index 3dd75c8725..675b46d56a 100644 54 | --- a/elf/dl-close.c 55 | +++ b/elf/dl-close.c 56 | @@ -140,8 +140,6 @@ _dl_close_worker (struct link_map *map, bool force) 57 | 58 | bool any_tls = false; 59 | const unsigned int nloaded = ns->_ns_nloaded; 60 | - char used[nloaded]; 61 | - char done[nloaded]; 62 | struct link_map *maps[nloaded]; 63 | 64 | /* Clear DF_1_NODELETE to force object deletion. We don't need to touch 65 | @@ -157,24 +155,21 @@ _dl_close_worker (struct link_map *map, bool force) 66 | int idx = 0; 67 | for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next) 68 | { 69 | - l->l_idx = idx; 70 | + l->l_map_used = 0; 71 | + l->l_map_done = 0; 72 | + l->l_idx = idx; 73 | maps[idx] = l; 74 | ++idx; 75 | - 76 | } 77 | assert (idx == nloaded); 78 | 79 | - /* Prepare the bitmaps. */ 80 | - memset (used, '\0', sizeof (used)); 81 | - memset (done, '\0', sizeof (done)); 82 | - 83 | /* Keep track of the lowest index link map we have covered already. */ 84 | int done_index = -1; 85 | while (++done_index < nloaded) 86 | { 87 | struct link_map *l = maps[done_index]; 88 | 89 | - if (done[done_index]) 90 | + if (l->l_map_done) 91 | /* Already handled. */ 92 | continue; 93 | 94 | @@ -185,12 +180,12 @@ _dl_close_worker (struct link_map *map, bool force) 95 | /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why 96 | acquire is sufficient and correct. */ 97 | && atomic_load_acquire (&l->l_tls_dtor_count) == 0 98 | - && !used[done_index]) 99 | + && !l->l_map_used) 100 | continue; 101 | 102 | /* We need this object and we handle it now. */ 103 | - done[done_index] = 1; 104 | - used[done_index] = 1; 105 | + l->l_map_used = 1; 106 | + l->l_map_done = 1; 107 | /* Signal the object is still needed. */ 108 | l->l_idx = IDX_STILL_USED; 109 | 110 | @@ -206,9 +201,9 @@ _dl_close_worker (struct link_map *map, bool force) 111 | { 112 | assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded); 113 | 114 | - if (!used[(*lp)->l_idx]) 115 | + if (!(*lp)->l_map_used) 116 | { 117 | - used[(*lp)->l_idx] = 1; 118 | + (*lp)->l_map_used = 1; 119 | /* If we marked a new object as used, and we've 120 | already processed it, then we need to go back 121 | and process again from that point forward to 122 | @@ -231,9 +226,9 @@ _dl_close_worker (struct link_map *map, bool force) 123 | { 124 | assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded); 125 | 126 | - if (!used[jmap->l_idx]) 127 | + if (!jmap->l_map_used) 128 | { 129 | - used[jmap->l_idx] = 1; 130 | + jmap->l_map_used = 1; 131 | if (jmap->l_idx - 1 < done_index) 132 | done_index = jmap->l_idx - 1; 133 | } 134 | @@ -243,8 +238,7 @@ _dl_close_worker (struct link_map *map, bool force) 135 | 136 | /* Sort the entries. We can skip looking for the binary itself which is 137 | at the front of the search list for the main namespace. */ 138 | - _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE), 139 | - used + (nsid == LM_ID_BASE), true); 140 | + _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); 141 | 142 | /* Call all termination functions at once. */ 143 | #ifdef SHARED 144 | @@ -261,7 +255,7 @@ _dl_close_worker (struct link_map *map, bool force) 145 | /* All elements must be in the same namespace. */ 146 | assert (imap->l_ns == nsid); 147 | 148 | - if (!used[i]) 149 | + if (!imap->l_map_used) 150 | { 151 | assert (imap->l_type == lt_loaded 152 | && (imap->l_flags_1 & DF_1_NODELETE) == 0); 153 | @@ -323,7 +317,7 @@ _dl_close_worker (struct link_map *map, bool force) 154 | if (i < first_loaded) 155 | first_loaded = i; 156 | } 157 | - /* Else used[i]. */ 158 | + /* Else imap->l_map_used. */ 159 | else if (imap->l_type == lt_loaded) 160 | { 161 | struct r_scope_elem *new_list = NULL; 162 | @@ -544,7 +538,7 @@ _dl_close_worker (struct link_map *map, bool force) 163 | for (unsigned int i = first_loaded; i < nloaded; ++i) 164 | { 165 | struct link_map *imap = maps[i]; 166 | - if (!used[i]) 167 | + if (!imap->l_map_used) 168 | { 169 | assert (imap->l_type == lt_loaded); 170 | 171 | diff --git a/elf/dl-deps.c b/elf/dl-deps.c 172 | index 7f8e228c17..4f9ee242ae 100644 173 | --- a/elf/dl-deps.c 174 | +++ b/elf/dl-deps.c 175 | @@ -616,7 +616,7 @@ Filters not supported with LD_TRACE_PRELINKING")); 176 | nlist * sizeof (struct link_map *)); 177 | /* We can skip looking for the binary itself which is at the front of 178 | the search list. */ 179 | - _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false); 180 | + _dl_sort_maps (l_initfini, nlist, true, false); 181 | 182 | /* Terminate the list of dependencies. */ 183 | l_initfini[nlist] = NULL; 184 | diff --git a/elf/dl-fini.c b/elf/dl-fini.c 185 | index cd3266e8f9..e2d41eff23 100644 186 | --- a/elf/dl-fini.c 187 | +++ b/elf/dl-fini.c 188 | @@ -92,8 +92,7 @@ _dl_fini (void) 189 | /* Now we have to do the sorting. We can skip looking for the 190 | binary itself which is at the front of the search list for 191 | the main namespace. */ 192 | - _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE), 193 | - NULL, true); 194 | + _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true); 195 | 196 | /* We do not rely on the linked list of loaded object anymore 197 | from this point on. We have our own list here (maps). The 198 | diff --git a/elf/dl-open.c b/elf/dl-open.c 199 | index 6524c3cc63..565205a980 100644 200 | --- a/elf/dl-open.c 201 | +++ b/elf/dl-open.c 202 | @@ -332,7 +332,7 @@ dl_open_worker (void *a) 203 | l = l->l_next; 204 | } 205 | while (l != NULL); 206 | - _dl_sort_maps (maps, nmaps, NULL, false); 207 | + _dl_sort_maps (maps, nmaps, false, false); 208 | 209 | int relocation_in_progress = 0; 210 | 211 | diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c 212 | index 416e8904ad..c1d6807b96 100644 213 | --- a/elf/dl-sort-maps.c 214 | +++ b/elf/dl-sort-maps.c 215 | @@ -14,109 +14,172 @@ 216 | 217 | You should have received a copy of the GNU Lesser General Public 218 | License along with the GNU C Library; if not, see 219 | - . */ 220 | + . */ 221 | 222 | +#include 223 | #include 224 | 225 | 226 | -/* Sort array MAPS according to dependencies of the contained objects. 227 | - Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is 228 | - called for finishing an object. */ 229 | -void 230 | -_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, 231 | - bool for_fini) 232 | +/* We use a recursive function due to its better clarity and ease of 233 | + implementation, as well as faster execution speed. We already use 234 | + alloca() for list allocation during the breadth-first search of 235 | + dependencies in _dl_map_object_deps(), and this should be on the 236 | + same order of worst-case stack usage. 237 | + 238 | + Note: the '*rpo' parameter is supposed to point to one past the 239 | + last element of the array where we save the sort results, and is 240 | + decremented before storing the current map at each level. */ 241 | + 242 | +static void 243 | +dfs_traversal (struct link_map ***rpo, struct link_map *map, 244 | + bool *do_reldeps) 245 | { 246 | - /* A list of one element need not be sorted. */ 247 | - if (nmaps <= 1) 248 | + /* _dl_map_object_deps ignores l_faked objects when calculating the 249 | + number of maps before calling _dl_sort_maps, ignore them as well. */ 250 | + if (map->l_visited || map->l_faked) 251 | return; 252 | 253 | - unsigned int i = 0; 254 | - uint16_t seen[nmaps]; 255 | - memset (seen, 0, nmaps * sizeof (seen[0])); 256 | - while (1) 257 | - { 258 | - /* Keep track of which object we looked at this round. */ 259 | - ++seen[i]; 260 | - struct link_map *thisp = maps[i]; 261 | + map->l_visited = 1; 262 | 263 | - if (__glibc_unlikely (for_fini)) 264 | + if (map->l_initfini) 265 | + { 266 | + for (int i = 0; map->l_initfini[i] != NULL; i++) 267 | { 268 | - /* Do not handle ld.so in secondary namespaces and objects which 269 | - are not removed. */ 270 | - if (thisp != thisp->l_real || thisp->l_idx == -1) 271 | - goto skip; 272 | + struct link_map *dep = map->l_initfini[i]; 273 | + if (dep->l_visited == 0 274 | + && dep->l_main_map == 0) 275 | + dfs_traversal (rpo, dep, do_reldeps); 276 | } 277 | + } 278 | 279 | - /* Find the last object in the list for which the current one is 280 | - a dependency and move the current object behind the object 281 | - with the dependency. */ 282 | - unsigned int k = nmaps - 1; 283 | - while (k > i) 284 | + if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL)) 285 | + { 286 | + /* Indicate that we encountered relocation dependencies during 287 | + traversal. */ 288 | + *do_reldeps = true; 289 | + 290 | + for (int m = map->l_reldeps->act - 1; m >= 0; m--) 291 | { 292 | - struct link_map **runp = maps[k]->l_initfini; 293 | - if (runp != NULL) 294 | - /* Look through the dependencies of the object. */ 295 | - while (*runp != NULL) 296 | - if (__glibc_unlikely (*runp++ == thisp)) 297 | - { 298 | - move: 299 | - /* Move the current object to the back past the last 300 | - object with it as the dependency. */ 301 | - memmove (&maps[i], &maps[i + 1], 302 | - (k - i) * sizeof (maps[0])); 303 | - maps[k] = thisp; 304 | - 305 | - if (used != NULL) 306 | - { 307 | - char here_used = used[i]; 308 | - memmove (&used[i], &used[i + 1], 309 | - (k - i) * sizeof (used[0])); 310 | - used[k] = here_used; 311 | - } 312 | - 313 | - if (seen[i + 1] > nmaps - i) 314 | - { 315 | - ++i; 316 | - goto next_clear; 317 | - } 318 | - 319 | - uint16_t this_seen = seen[i]; 320 | - memmove (&seen[i], &seen[i + 1], (k - i) * sizeof (seen[0])); 321 | - seen[k] = this_seen; 322 | - 323 | - goto next; 324 | - } 325 | - 326 | - if (__glibc_unlikely (for_fini && maps[k]->l_reldeps != NULL)) 327 | - { 328 | - unsigned int m = maps[k]->l_reldeps->act; 329 | - struct link_map **relmaps = &maps[k]->l_reldeps->list[0]; 330 | - 331 | - /* Look through the relocation dependencies of the object. */ 332 | - while (m-- > 0) 333 | - if (__glibc_unlikely (relmaps[m] == thisp)) 334 | - { 335 | - /* If a cycle exists with a link time dependency, 336 | - preserve the latter. */ 337 | - struct link_map **runp = thisp->l_initfini; 338 | - if (runp != NULL) 339 | - while (*runp != NULL) 340 | - if (__glibc_unlikely (*runp++ == maps[k])) 341 | - goto ignore; 342 | - goto move; 343 | - } 344 | - ignore:; 345 | - } 346 | - 347 | - --k; 348 | + struct link_map *dep = map->l_reldeps->list[m]; 349 | + if (dep->l_visited == 0 350 | + && dep->l_main_map == 0) 351 | + dfs_traversal (rpo, dep, do_reldeps); 352 | } 353 | + } 354 | + 355 | + *rpo -= 1; 356 | + **rpo = map; 357 | +} 358 | 359 | - skip: 360 | - if (++i == nmaps) 361 | - break; 362 | - next_clear: 363 | - memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 364 | +/* Topologically sort array MAPS according to dependencies of the contained 365 | + objects. */ 366 | 367 | - next:; 368 | +static void 369 | +_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps, 370 | + unsigned int skip __attribute__ ((unused)), bool for_fini) 371 | +{ 372 | + for (int i = nmaps - 1; i >= 0; i--) 373 | + maps[i]->l_visited = 0; 374 | + 375 | + /* We apply DFS traversal for each of maps[i] until the whole total order 376 | + is found and we're at the start of the Reverse-Postorder (RPO) sequence, 377 | + which is a topological sort. 378 | + 379 | + We go from maps[nmaps - 1] backwards towards maps[0] at this level. 380 | + Due to the breadth-first search (BFS) ordering we receive, going 381 | + backwards usually gives a more shallow depth-first recursion depth, 382 | + adding more stack usage safety. Also, combined with the natural 383 | + processing order of l_initfini[] at each node during DFS, this maintains 384 | + an ordering closer to the original link ordering in the sorting results 385 | + under most simpler cases. 386 | + 387 | + Another reason we order the top level backwards, it that maps[0] is 388 | + usually exactly the main object of which we're in the midst of 389 | + _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still 390 | + blank. If we start the traversal from maps[0], since having no 391 | + dependencies yet filled in, maps[0] will always be immediately 392 | + incorrectly placed at the last place in the order (first in reverse). 393 | + Adjusting the order so that maps[0] is last traversed naturally avoids 394 | + this problem. 395 | + 396 | + Further, the old "optimization" of skipping the main object at maps[0] 397 | + from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general 398 | + no longer valid, since traversing along object dependency-links 399 | + may "find" the main object even when it is not included in the initial 400 | + order (e.g. a dlopen()'ed shared object can have circular dependencies 401 | + linked back to itself). In such a case, traversing N-1 objects will 402 | + create a N-object result, and raise problems. 403 | + 404 | + To summarize, just passing in the full list, and iterating from back 405 | + to front makes things much more straightforward. */ 406 | + 407 | + /* Array to hold RPO sorting results, before we copy back to maps[]. */ 408 | + struct link_map *rpo[nmaps]; 409 | + 410 | + /* The 'head' position during each DFS iteration. Note that we start at 411 | + one past the last element due to first-decrement-then-store (see the 412 | + bottom of above dfs_traversal() routine). */ 413 | + struct link_map **rpo_head = &rpo[nmaps]; 414 | + 415 | + bool do_reldeps = false; 416 | + bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL); 417 | + 418 | + for (int i = nmaps - 1; i >= 0; i--) 419 | + { 420 | + dfs_traversal (&rpo_head, maps[i], do_reldeps_ref); 421 | + 422 | + /* We can break early if all objects are already placed. */ 423 | + if (rpo_head == rpo) 424 | + goto end; 425 | + } 426 | + assert (rpo_head == rpo); 427 | + 428 | + end: 429 | + /* Here we may do a second pass of sorting, using only l_initfini[] 430 | + static dependency links. This is avoided if !FOR_FINI or if we didn't 431 | + find any reldeps in the first DFS traversal. 432 | + 433 | + The reason we do this is: while it is unspecified how circular 434 | + dependencies should be handled, the presumed reasonable behavior is to 435 | + have destructors to respect static dependency links as much as possible, 436 | + overriding reldeps if needed. And the first sorting pass, which takes 437 | + l_initfini/l_reldeps links equally, may not preserve this priority. 438 | + 439 | + Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account 440 | + (see how the do_reldeps argument to dfs_traversal() is NULL below). */ 441 | + if (do_reldeps) 442 | + { 443 | + for (int i = nmaps - 1; i >= 0; i--) 444 | + rpo[i]->l_visited = 0; 445 | + 446 | + struct link_map **maps_head = &maps[nmaps]; 447 | + for (int i = nmaps - 1; i >= 0; i--) 448 | + { 449 | + dfs_traversal (&maps_head, rpo[i], NULL); 450 | + 451 | + /* We can break early if all objects are already placed. 452 | + The below memcpy is not needed in the do_reldeps case here, 453 | + since we wrote back to maps[] during DFS traversal. */ 454 | + if (maps_head == maps) 455 | + return; 456 | + } 457 | + assert (maps_head == maps); 458 | + return; 459 | } 460 | + 461 | + memcpy (maps, rpo, sizeof (struct link_map *) * nmaps); 462 | +} 463 | + 464 | +void 465 | +_dl_sort_maps (struct link_map **maps, unsigned int nmaps, 466 | + unsigned int skip, bool for_fini) 467 | +{ 468 | + /* It can be tempting to use a static function pointer to store and call 469 | + the current selected sorting algorithm routine, but experimentation 470 | + shows that current processors still do not handle indirect branches 471 | + that efficiently, plus a static function pointer will involve 472 | + PTR_MANGLE/DEMANGLE, further impairing performance of small, common 473 | + input cases. A simple if-case with direct function calls appears to 474 | + be the fastest. */ 475 | + _dl_sort_maps_dfs (maps, nmaps, skip, for_fini); 476 | } 477 | diff --git a/elf/rtld.c b/elf/rtld.c 478 | index b27ced981f..44566a5d7d 100644 479 | --- a/elf/rtld.c 480 | +++ b/elf/rtld.c 481 | @@ -1126,6 +1126,9 @@ of this helper program; chances are you did not intend to run this program.\n\ 482 | main_map->l_name = (char *) ""; 483 | *user_entry = main_map->l_entry; 484 | 485 | + /* Set bit indicating this is the main program map. */ 486 | + main_map->l_main_map = 1; 487 | + 488 | #ifdef HAVE_AUX_VECTOR 489 | /* Adjust the on-stack auxiliary vector so that it looks like the 490 | binary was executed directly. */ 491 | diff --git a/include/link.h b/include/link.h 492 | index eeb5f4d53b..60c024be6d 100644 493 | --- a/include/link.h 494 | +++ b/include/link.h 495 | @@ -178,6 +178,11 @@ struct link_map 496 | unsigned int l_init_called:1; /* Nonzero if DT_INIT function called. */ 497 | unsigned int l_global:1; /* Nonzero if object in _dl_global_scope. */ 498 | unsigned int l_reserved:2; /* Reserved for internal use. */ 499 | + unsigned int l_main_map:1; /* Nonzero for the map of the main program. */ 500 | + unsigned int l_visited:1; /* Used internally for map dependency 501 | + graph traversal. */ 502 | + unsigned int l_map_used:1; /* These two bits are used during traversal */ 503 | + unsigned int l_map_done:1; /* of maps in _dl_close_worker. */ 504 | unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed 505 | to by `l_phdr' is allocated. */ 506 | unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in 507 | diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h 508 | index b15b3aad34..f1ffd54b98 100644 509 | --- a/sysdeps/generic/ldsodefs.h 510 | +++ b/sysdeps/generic/ldsodefs.h 511 | @@ -922,7 +922,7 @@ extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 512 | char *used, bool for_fini) attribute_hidden; 513 | */ 514 | extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 515 | - char *used, bool for_fini) internal_function attribute_hidden; 516 | + unsigned int skip, bool for_fini) internal_function attribute_hidden; 517 | 518 | /* The dynamic linker calls this function before and having changing 519 | any shared object mappings. The `r_state' member of `struct r_debug' 520 | -- 521 | 2.39.5 522 | 523 | -------------------------------------------------------------------------------- /patches/debian/2.24/unsubmitted-mathworks-bz19329-1-of-2.v2.27.patch: -------------------------------------------------------------------------------- 1 | From: Szabolcs Nagy 2 | To: GNU C Library 3 | Cc: , Torvald Riegel 4 | Date: Wed, 30 Nov 2016 11:44:25 +0000 5 | Subject: [RFC PATCH 1/2][BZ 19329] remove broken code path for easier code review 6 | 7 | This patch is not necessary for the bug fix, just makes concurrency 8 | code review easier (removes a data race and overflow from a broken 9 | code path). 10 | 11 | dlopen can oom crash anyway in _dl_resize_dtv and it's probably 12 | better to crash than leave half setup modules around. 13 | 14 | 2016-11-30 Szabolcs Nagy 15 | 16 | * elf/dl-tls.c (_dl_add_to_slotinfo): OOM crash. 17 | 18 | --- a/elf/dl-tls.c 19 | +++ b/elf/dl-tls.c 20 | @@ -918,18 +918,10 @@ _dl_add_to_slotinfo (struct link_map *l) 21 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 22 | if (listp == NULL) 23 | { 24 | - /* We ran out of memory. We will simply fail this 25 | - call but don't undo anything we did so far. The 26 | - application will crash or be terminated anyway very 27 | - soon. */ 28 | - 29 | - /* We have to do this since some entries in the dtv 30 | - slotinfo array might already point to this 31 | - generation. */ 32 | - ++GL(dl_tls_generation); 33 | - 34 | - _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\ 35 | -cannot create TLS data structures")); 36 | + /* We ran out of memory in dlopen while updating tls structures. 37 | + TODO: side-effects should be rolled back and the failure should 38 | + be reported to the caller, but that's hard. */ 39 | + oom (); 40 | } 41 | 42 | listp->len = TLS_SLOTINFO_SURPLUS; 43 | -------------------------------------------------------------------------------- /patches/debian/2.24/unsubmitted-mathworks-bz19329-2-of-2.v2.27.patch: -------------------------------------------------------------------------------- 1 | From: Szabolcs Nagy 2 | To: GNU C Library , Torvald Riegel 3 | Cc: 4 | Date: Wed, 30 Nov 2016 11:44:32 +0000 5 | Subject: [RFC PATCH 2/2][BZ 19329] Fix data races between pthread_create and dlopen 6 | 7 | This fixes a subset of the issues described in 8 | https://sourceware.org/ml/libc-alpha/2016-11/msg01026.html 9 | without adding locks to pthread_create. 10 | 11 | Only races between dlopen and pthread_create were considered, 12 | and the asserts got removed that tried to check for concurrency 13 | issues. 14 | 15 | The patch is incomplete because dlclose, tls access and 16 | dl_iterate_phdr related code paths are not modified. 17 | 18 | dlclose should be updated in a similar fashion to dlopen 19 | to make the patch complete alternatively pthread_create 20 | may take the GL(dl_load_write_lock) to sync with dlclose 21 | or the GL(dl_load_lock) to sync with dlopen and dlclose 22 | (that would simplify the concurrency design, but increase 23 | lock contention on the locks). 24 | 25 | 2016-11-30 Szabolcs Nagy 26 | 27 | [BZ #19329] 28 | * elf/dl-open.c (dl_open_worker): Write GL(dl_tls_generation) 29 | atomically. 30 | * elf/dl-tls.c (_dl_allocate_tls_init): Read GL(dl_tls_generation), 31 | GL(dl_tls_max_dtv_idx), slotinfo entries and listp->next atomically. 32 | Remove assertions that cannot be guaranteed. 33 | (_dl_add_to_slotinfo): Write the slotinfo entries and listp->next 34 | atomically. 35 | 36 | --- a/elf/dl-open.c 37 | +++ b/elf/dl-open.c 38 | @@ -482,9 +482,17 @@ dl_open_worker (void *a) 39 | } 40 | 41 | /* Bump the generation number if necessary. */ 42 | - if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0)) 43 | - _dl_fatal_printf (N_("\ 44 | + if (any_tls) 45 | + { 46 | + /* This cannot be in a data-race so non-atomic load is valid too. */ 47 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 48 | + /* Synchronize with _dl_allocate_tls_init (see notes there) and 49 | + avoid storing an overflowed counter. */ 50 | + if (__builtin_expect (newgen == 0, 0)) 51 | + _dl_fatal_printf (N_("\ 52 | TLS generation counter wrapped! Please report this.")); 53 | + atomic_store_release (&GL(dl_tls_generation), newgen); 54 | + } 55 | 56 | /* We need a second pass for static tls data, because _dl_update_slotinfo 57 | must not be run while calls to _dl_add_to_slotinfo are still pending. */ 58 | --- a/elf/dl-tls.c 59 | +++ b/elf/dl-tls.c 60 | @@ -432,6 +432,36 @@ _dl_resize_dtv (dtv_t *dtv) 61 | } 62 | 63 | 64 | +/* 65 | +CONCURRENCY NOTES 66 | + 67 | +dlopen (and dlclose) holds the GL(dl_load_lock) while writing shared state, 68 | +which may be concurrently read by pthread_create and tls access without taking 69 | +the lock, so atomic access should be used. The shared state: 70 | + 71 | + GL(dl_tls_max_dtv_idx) - max modid assigned, (modid can be reused). 72 | + GL(dl_tls_generation) - generation count, incremented by dlopen and dlclose. 73 | + GL(dl_tls_dtv_slotinfo_list) - list of entries, contains generation count 74 | + and link_map for each module with a modid. 75 | + 76 | +A module gets a modid assigned if it has tls, a modid identifies a slotinfo 77 | +entry and it is the index of the corresponding dtv slot. The generation count 78 | +is assigned to slotinfo entries of a newly loaded or unloaded module and its 79 | +newly loaded or unloaded dependencies. 80 | + 81 | +TODO: dlclose may free memory read by a concurrent pthread_create or tls 82 | +access. This is broken now, so it is assumed that dlclose does not free 83 | +link_map structures while pthread_create or __tls_get_addr is reading them. 84 | + 85 | +pthread_create calls _dl_allocate_tls_init (before creating the new thread), 86 | +which should guarantee that the dtv is in a consistent state at the end: 87 | + 88 | +All slotinfo updates with generation <= dtv[0].counter are reflected in the 89 | +dtv and arbitrary later module unloads may also be reflected as unallocated 90 | +entries. (Note: a modid reuse implies a module unload and accessing tls in 91 | +an unloaded module is undefined.) 92 | +*/ 93 | + 94 | void * 95 | _dl_allocate_tls_init (void *result) 96 | { 97 | @@ -443,12 +473,24 @@ _dl_allocate_tls_init (void *result) 98 | struct dtv_slotinfo_list *listp; 99 | size_t total = 0; 100 | size_t maxgen = 0; 101 | + /* Synchronizes with the increments in dl_{open,close}_worker. 102 | + Slotinfo updates of this generation are sequenced before the 103 | + write we read from here. */ 104 | + size_t gen_count = atomic_load_acquire (&GL(dl_tls_generation)); 105 | + /* Either reads from the last write that is sequenced before the 106 | + generation counter increment we synchronized with or a write 107 | + made by a later dlopen/dlclose. dlclose may decrement this, 108 | + but only if related modules are unloaded. So it is an upper 109 | + bound on non-unloaded modids up to gen_count generation. */ 110 | + size_t dtv_slots = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); 111 | 112 | /* Check if the current dtv is big enough. */ 113 | - if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) 114 | + if (dtv[-1].counter < dtv_slots) 115 | { 116 | /* Resize the dtv. */ 117 | dtv = _dl_resize_dtv (dtv); 118 | + /* _dl_resize_dtv rereads GL(dl_tls_max_dtv_idx) which may decrease. */ 119 | + dtv_slots = dtv[-1].counter; 120 | 121 | /* Install this new dtv in the thread data structures. */ 122 | INSTALL_DTV (result, &dtv[-1]); 123 | @@ -465,22 +507,33 @@ _dl_allocate_tls_init (void *result) 124 | for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) 125 | { 126 | struct link_map *map; 127 | + size_t gen; 128 | void *dest; 129 | 130 | /* Check for the total number of used slots. */ 131 | - if (total + cnt > GL(dl_tls_max_dtv_idx)) 132 | + if (total + cnt > dtv_slots) 133 | break; 134 | 135 | - map = listp->slotinfo[cnt].map; 136 | + /* Synchronize with dl_add_to_slotinfo and remove_slotinfo. */ 137 | + map = atomic_load_acquire (&listp->slotinfo[cnt].map); 138 | if (map == NULL) 139 | /* Unused entry. */ 140 | continue; 141 | 142 | + /* Consistent generation count with the map read above. 143 | + Inconsistent gen may be read if the entry is being reused, 144 | + in which case it is larger than gen_count and we skip it. */ 145 | + gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); 146 | + if (gen > gen_count) 147 | + /* New entry. */ 148 | + continue; 149 | + 150 | /* Keep track of the maximum generation number. This might 151 | not be the generation counter. */ 152 | - assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation)); 153 | - maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); 154 | + maxgen = MAX (maxgen, gen); 155 | 156 | + /* TODO: concurrent dlclose may free map which would break 157 | + the rest of the code below. */ 158 | dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; 159 | dtv[map->l_tls_modid].pointer.to_free = NULL; 160 | 161 | @@ -510,11 +563,15 @@ _dl_allocate_tls_init (void *result) 162 | } 163 | 164 | total += cnt; 165 | - if (total >= GL(dl_tls_max_dtv_idx)) 166 | + if (total >= dtv_slots) 167 | break; 168 | 169 | - listp = listp->next; 170 | - assert (listp != NULL); 171 | + /* Synchronize with dl_add_to_slotinfo. */ 172 | + listp = atomic_load_acquire (&listp->next); 173 | + /* dtv_slots is an upper bound on the number of entries we care 174 | + about, the list may end sooner. */ 175 | + if (listp == NULL) 176 | + break; 177 | } 178 | 179 | /* The DTV version is up-to-date now. */ 180 | @@ -913,7 +970,7 @@ _dl_add_to_slotinfo (struct link_map *l) 181 | the first slot. */ 182 | assert (idx == 0); 183 | 184 | - listp = prevp->next = (struct dtv_slotinfo_list *) 185 | + listp = (struct dtv_slotinfo_list *) 186 | malloc (sizeof (struct dtv_slotinfo_list) 187 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 188 | if (listp == NULL) 189 | @@ -928,9 +985,17 @@ _dl_add_to_slotinfo (struct link_map *l) 190 | listp->next = NULL; 191 | memset (listp->slotinfo, '\0', 192 | TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 193 | + /* Add the new list item and synchronize with _dl_allocate_tls_init. */ 194 | + atomic_store_release (&prevp->next, listp); 195 | } 196 | 197 | /* Add the information into the slotinfo data structure. */ 198 | - listp->slotinfo[idx].map = l; 199 | - listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; 200 | + 201 | + /* This cannot be in a data-race so non-atomic load would be valid too. */ 202 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 203 | + /* TODO: Concurrent readers may see an overflowed gen, which is bad, 204 | + but overflow is guaranteed to crash the dlopen that is executing. */ 205 | + atomic_store_relaxed (&listp->slotinfo[idx].gen, newgen); 206 | + /* Synchronize with _dl_allocate_tls_init (see notes there). */ 207 | + atomic_store_release (&listp->slotinfo[idx].map, l); 208 | } 209 | -------------------------------------------------------------------------------- /patches/debian/2.24/unsubmitted-mathworks-bz19329-fixup.v2.27.patch: -------------------------------------------------------------------------------- 1 | --- a/elf/dl-tls.c 2 | +++ b/elf/dl-tls.c 3 | @@ -563,7 +563,7 @@ _dl_allocate_tls_init (void *result) 4 | } 5 | 6 | total += cnt; 7 | - if (total >= dtv_slots) 8 | + if (total > dtv_slots) 9 | break; 10 | 11 | /* Synchronize with dl_add_to_slotinfo. */ 12 | -------------------------------------------------------------------------------- /patches/debian/2.27/unsubmitted-mathworks-0-bz17645.v2.31.patch: -------------------------------------------------------------------------------- 1 | **************************ORIGINAL HEADER***************************** 2 | From patchwork Thu Oct 21 13:41:22 2021 3 | Content-Type: text/plain; charset="utf-8" 4 | MIME-Version: 1.0 5 | Content-Transfer-Encoding: 7bit 6 | X-Patchwork-Submitter: Chung-Lin Tang 7 | X-Patchwork-Id: 46497 8 | Return-Path: 9 | X-Original-To: patchwork@sourceware.org 10 | Delivered-To: patchwork@sourceware.org 11 | Received: from server2.sourceware.org (localhost [IPv6:::1]) 12 | by sourceware.org (Postfix) with ESMTP id B66BB3857801 13 | for ; Thu, 21 Oct 2021 13:41:58 +0000 (GMT) 14 | X-Original-To: libc-alpha@sourceware.org 15 | Delivered-To: libc-alpha@sourceware.org 16 | Received: from seed.net.tw (sn15.seed.net.tw [139.175.54.15]) 17 | by sourceware.org (Postfix) with ESMTP id 3B6AC3858405 18 | for ; Thu, 21 Oct 2021 13:41:41 +0000 (GMT) 19 | DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 3B6AC3858405 20 | Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) 21 | header.from=codesourcery.com 22 | Authentication-Results: sourceware.org; 23 | spf=none smtp.mailfrom=codesourcery.com 24 | Received: from [112.104.15.59] (port=35230 helo=localhost.localdomain) 25 | by seed.net.tw with esmtp (Seednet 4.69:2) 26 | id 1mdYK9-000NfF-1W; Thu, 21 Oct 2021 21:41:37 +0800 27 | From: Chung-Lin Tang 28 | To: libc-alpha@sourceware.org, 29 | Adhemerval Zanella 30 | Subject: [PATCH v8 2/2] elf: Fix slow DSO sorting behavior in dynamic loader 31 | (BZ #17645) 32 | Date: Thu, 21 Oct 2021 21:41:22 +0800 33 | Message-Id: <20211021134122.3141-2-cltang@codesourcery.com> 34 | X-Mailer: git-send-email 2.17.1 35 | In-Reply-To: <20211021134122.3141-1-cltang@codesourcery.com> 36 | References: <20211021134122.3141-1-cltang@codesourcery.com> 37 | X-Spam-Status: No, score=-19.6 required=5.0 tests=BAYES_00, FORGED_SPF_HELO, 38 | GIT_PATCH_0, KAM_DMARC_STATUS, KAM_LAZY_DOMAIN_SECURITY, KAM_SHORT, 39 | RCVD_IN_DNSWL_LOW, SPF_HELO_PASS, SPF_NONE, 40 | TXREP autolearn=ham autolearn_force=no version=3.4.4 41 | X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on 42 | server2.sourceware.org 43 | X-BeenThere: libc-alpha@sourceware.org 44 | X-Mailman-Version: 2.1.29 45 | Precedence: list 46 | List-Id: Libc-alpha mailing list 47 | List-Unsubscribe: , 48 | 49 | List-Archive: 50 | List-Post: 51 | List-Help: 52 | List-Subscribe: , 53 | 54 | Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org 55 | Sender: "Libc-alpha" 56 | 57 | 58 | This second patch contains the actual implementation of a new sorting algorithm 59 | for shared objects in the dynamic loader, which solves the slow behavior that 60 | the current "old" algorithm falls into when the DSO set contains circular 61 | dependencies. 62 | 63 | The new algorithm implemented here is simply depth-first search (DFS) to obtain 64 | the Reverse-Post Order (RPO) sequence, a topological sort. A new l_visited:1 65 | bitfield is added to struct link_map to more elegantly facilitate such a search. 66 | 67 | The DFS algorithm is applied to the input maps[nmap-1] backwards towards 68 | maps[0]. This has the effect of a more "shallow" recursion depth in general 69 | since the input is in BFS. Also, when combined with the natural order of 70 | processing l_initfini[] at each node, this creates a resulting output sorting 71 | closer to the intuitive "left-to-right" order in most cases. 72 | 73 | Another notable implementation adjustment related to this _dl_sort_maps change 74 | is the removing of two char arrays 'used' and 'done' in _dl_close_worker to 75 | represent two per-map attributes. This has been changed to simply use two new 76 | bit-fields l_map_used:1, l_map_done:1 added to struct link_map. This also allows 77 | discarding the clunky 'used' array sorting that _dl_sort_maps had to sometimes 78 | do along the way. 79 | 80 | Tunable support for switching between different sorting algorithms at runtime is 81 | also added. A new tunable 'glibc.rtld.dynamic_sort' with current valid values 1 82 | (old algorithm) and 2 (new DFS algorithm) has been added. At time of commit 83 | of this patch, the default setting is 1 (old algorithm). 84 | 85 | Signed-off-by: Chung-Lin Tang 86 | Reviewed-by: Adhemerval Zanella 87 | ********************************************************************************** 88 | 89 | Mathworks 90 | Backport to glibc 2.31, remove tunable support and set default to the DFS sort map behavior 91 | 92 | Coded-by: Alan Li 93 | 94 | 95 | diff --git a/elf/dl-close.c b/elf/dl-close.c 96 | index 73b2817..cfad816 100644 97 | --- a/elf/dl-close.c 98 | +++ b/elf/dl-close.c 99 | @@ -164,8 +164,6 @@ _dl_close_worker (struct link_map *map, bool force) 100 | 101 | bool any_tls = false; 102 | const unsigned int nloaded = ns->_ns_nloaded; 103 | - char used[nloaded]; 104 | - char done[nloaded]; 105 | struct link_map *maps[nloaded]; 106 | 107 | /* Run over the list and assign indexes to the link maps and enter 108 | @@ -173,24 +171,21 @@ _dl_close_worker (struct link_map *map, bool force) 109 | int idx = 0; 110 | for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next) 111 | { 112 | - l->l_idx = idx; 113 | + l->l_map_used = 0; 114 | + l->l_map_done = 0; 115 | + l->l_idx = idx; 116 | maps[idx] = l; 117 | ++idx; 118 | - 119 | } 120 | assert (idx == nloaded); 121 | 122 | - /* Prepare the bitmaps. */ 123 | - memset (used, '\0', sizeof (used)); 124 | - memset (done, '\0', sizeof (done)); 125 | - 126 | /* Keep track of the lowest index link map we have covered already. */ 127 | int done_index = -1; 128 | while (++done_index < nloaded) 129 | { 130 | struct link_map *l = maps[done_index]; 131 | 132 | - if (done[done_index]) 133 | + if (l->l_map_done) 134 | /* Already handled. */ 135 | continue; 136 | 137 | @@ -201,12 +196,12 @@ _dl_close_worker (struct link_map *map, bool force) 138 | /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why 139 | acquire is sufficient and correct. */ 140 | && atomic_load_acquire (&l->l_tls_dtor_count) == 0 141 | - && !used[done_index]) 142 | + && !l->l_map_used) 143 | continue; 144 | 145 | /* We need this object and we handle it now. */ 146 | - done[done_index] = 1; 147 | - used[done_index] = 1; 148 | + l->l_map_used = 1; 149 | + l->l_map_done = 1; 150 | /* Signal the object is still needed. */ 151 | l->l_idx = IDX_STILL_USED; 152 | 153 | @@ -222,9 +217,9 @@ _dl_close_worker (struct link_map *map, bool force) 154 | { 155 | assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded); 156 | 157 | - if (!used[(*lp)->l_idx]) 158 | + if (!(*lp)->l_map_used) 159 | { 160 | - used[(*lp)->l_idx] = 1; 161 | + (*lp)->l_map_used = 1; 162 | /* If we marked a new object as used, and we've 163 | already processed it, then we need to go back 164 | and process again from that point forward to 165 | @@ -247,9 +242,9 @@ _dl_close_worker (struct link_map *map, bool force) 166 | { 167 | assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded); 168 | 169 | - if (!used[jmap->l_idx]) 170 | + if (!jmap->l_map_used) 171 | { 172 | - used[jmap->l_idx] = 1; 173 | + jmap->l_map_used = 1; 174 | if (jmap->l_idx - 1 < done_index) 175 | done_index = jmap->l_idx - 1; 176 | } 177 | @@ -259,8 +254,7 @@ _dl_close_worker (struct link_map *map, bool force) 178 | 179 | /* Sort the entries. We can skip looking for the binary itself which is 180 | at the front of the search list for the main namespace. */ 181 | - _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE), 182 | - used + (nsid == LM_ID_BASE), true); 183 | + _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); 184 | 185 | /* Call all termination functions at once. */ 186 | #ifdef SHARED 187 | @@ -277,7 +271,7 @@ _dl_close_worker (struct link_map *map, bool force) 188 | /* All elements must be in the same namespace. */ 189 | assert (imap->l_ns == nsid); 190 | 191 | - if (!used[i]) 192 | + if (!imap->l_map_used) 193 | { 194 | assert (imap->l_type == lt_loaded && !imap->l_nodelete_active); 195 | 196 | @@ -330,7 +324,7 @@ _dl_close_worker (struct link_map *map, bool force) 197 | if (i < first_loaded) 198 | first_loaded = i; 199 | } 200 | - /* Else used[i]. */ 201 | + /* Else imap->l_map_used. */ 202 | else if (imap->l_type == lt_loaded) 203 | { 204 | struct r_scope_elem *new_list = NULL; 205 | @@ -554,7 +548,7 @@ _dl_close_worker (struct link_map *map, bool force) 206 | for (unsigned int i = first_loaded; i < nloaded; ++i) 207 | { 208 | struct link_map *imap = maps[i]; 209 | - if (!used[i]) 210 | + if (!imap->l_map_used) 211 | { 212 | assert (imap->l_type == lt_loaded); 213 | 214 | diff --git a/elf/dl-deps.c b/elf/dl-deps.c 215 | index 5103a8a..33c126c 100644 216 | --- a/elf/dl-deps.c 217 | +++ b/elf/dl-deps.c 218 | @@ -591,7 +591,7 @@ Filters not supported with LD_TRACE_PRELINKING")); 219 | nlist * sizeof (struct link_map *)); 220 | /* We can skip looking for the binary itself which is at the front of 221 | the search list. */ 222 | - _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false); 223 | + _dl_sort_maps (l_initfini, nlist, 1, false); 224 | 225 | /* Terminate the list of dependencies. */ 226 | l_initfini[nlist] = NULL; 227 | diff --git a/elf/dl-fini.c b/elf/dl-fini.c 228 | index 226a6f0..afecbd1 100644 229 | --- a/elf/dl-fini.c 230 | +++ b/elf/dl-fini.c 231 | @@ -91,8 +91,7 @@ _dl_fini (void) 232 | /* Now we have to do the sorting. We can skip looking for the 233 | binary itself which is at the front of the search list for 234 | the main namespace. */ 235 | - _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE), 236 | - NULL, true); 237 | + _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true); 238 | 239 | /* We do not rely on the linked list of loaded object anymore 240 | from this point on. We have our own list here (maps). The 241 | diff --git a/elf/dl-open.c b/elf/dl-open.c 242 | index a5238d9..3b18c4c 100644 243 | --- a/elf/dl-open.c 244 | +++ b/elf/dl-open.c 245 | @@ -644,7 +644,7 @@ dl_open_worker (void *a) 246 | l = l->l_next; 247 | } 248 | while (l != NULL); 249 | - _dl_sort_maps (maps, nmaps, NULL, false); 250 | + _dl_sort_maps (maps, nmaps, 0, false); 251 | 252 | int relocation_in_progress = 0; 253 | 254 | diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c 255 | index 86f1e23..705be5c 100644 256 | --- a/elf/dl-sort-maps.c 257 | +++ b/elf/dl-sort-maps.c 258 | @@ -16,107 +16,168 @@ 259 | License along with the GNU C Library; if not, see 260 | . */ 261 | 262 | +#include 263 | #include 264 | 265 | 266 | -/* Sort array MAPS according to dependencies of the contained objects. 267 | - Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is 268 | - called for finishing an object. */ 269 | -void 270 | -_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, 271 | - bool for_fini) 272 | +/* We use a recursive function due to its better clarity and ease of 273 | + implementation, as well as faster execution speed. We already use 274 | + alloca() for list allocation during the breadth-first search of 275 | + dependencies in _dl_map_object_deps(), and this should be on the 276 | + same order of worst-case stack usage. 277 | + 278 | + Note: the '*rpo' parameter is supposed to point to one past the 279 | + last element of the array where we save the sort results, and is 280 | + decremented before storing the current map at each level. */ 281 | + 282 | +static void 283 | +dfs_traversal (struct link_map ***rpo, struct link_map *map, 284 | + bool *do_reldeps) 285 | { 286 | - /* A list of one element need not be sorted. */ 287 | - if (nmaps <= 1) 288 | + if (map->l_visited) 289 | return; 290 | 291 | - unsigned int i = 0; 292 | - uint16_t seen[nmaps]; 293 | - memset (seen, 0, nmaps * sizeof (seen[0])); 294 | - while (1) 295 | - { 296 | - /* Keep track of which object we looked at this round. */ 297 | - ++seen[i]; 298 | - struct link_map *thisp = maps[i]; 299 | + map->l_visited = 1; 300 | 301 | - if (__glibc_unlikely (for_fini)) 302 | + if (map->l_initfini) 303 | + { 304 | + for (int i = 0; map->l_initfini[i] != NULL; i++) 305 | { 306 | - /* Do not handle ld.so in secondary namespaces and objects which 307 | - are not removed. */ 308 | - if (thisp != thisp->l_real || thisp->l_idx == -1) 309 | - goto skip; 310 | + struct link_map *dep = map->l_initfini[i]; 311 | + if (dep->l_visited == 0 312 | + && dep->l_main_map == 0) 313 | + dfs_traversal (rpo, dep, do_reldeps); 314 | } 315 | + } 316 | 317 | - /* Find the last object in the list for which the current one is 318 | - a dependency and move the current object behind the object 319 | - with the dependency. */ 320 | - unsigned int k = nmaps - 1; 321 | - while (k > i) 322 | + if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL)) 323 | + { 324 | + /* Indicate that we encountered relocation dependencies during 325 | + traversal. */ 326 | + *do_reldeps = true; 327 | + 328 | + for (int m = map->l_reldeps->act - 1; m >= 0; m--) 329 | { 330 | - struct link_map **runp = maps[k]->l_initfini; 331 | - if (runp != NULL) 332 | - /* Look through the dependencies of the object. */ 333 | - while (*runp != NULL) 334 | - if (__glibc_unlikely (*runp++ == thisp)) 335 | - { 336 | - move: 337 | - /* Move the current object to the back past the last 338 | - object with it as the dependency. */ 339 | - memmove (&maps[i], &maps[i + 1], 340 | - (k - i) * sizeof (maps[0])); 341 | - maps[k] = thisp; 342 | - 343 | - if (used != NULL) 344 | - { 345 | - char here_used = used[i]; 346 | - memmove (&used[i], &used[i + 1], 347 | - (k - i) * sizeof (used[0])); 348 | - used[k] = here_used; 349 | - } 350 | - 351 | - if (seen[i + 1] > nmaps - i) 352 | - { 353 | - ++i; 354 | - goto next_clear; 355 | - } 356 | - 357 | - uint16_t this_seen = seen[i]; 358 | - memmove (&seen[i], &seen[i + 1], (k - i) * sizeof (seen[0])); 359 | - seen[k] = this_seen; 360 | - 361 | - goto next; 362 | - } 363 | - 364 | - if (__glibc_unlikely (for_fini && maps[k]->l_reldeps != NULL)) 365 | - { 366 | - unsigned int m = maps[k]->l_reldeps->act; 367 | - struct link_map **relmaps = &maps[k]->l_reldeps->list[0]; 368 | - 369 | - /* Look through the relocation dependencies of the object. */ 370 | - while (m-- > 0) 371 | - if (__glibc_unlikely (relmaps[m] == thisp)) 372 | - { 373 | - /* If a cycle exists with a link time dependency, 374 | - preserve the latter. */ 375 | - struct link_map **runp = thisp->l_initfini; 376 | - if (runp != NULL) 377 | - while (*runp != NULL) 378 | - if (__glibc_unlikely (*runp++ == maps[k])) 379 | - goto ignore; 380 | - goto move; 381 | - } 382 | - ignore:; 383 | - } 384 | - 385 | - --k; 386 | + struct link_map *dep = map->l_reldeps->list[m]; 387 | + if (dep->l_visited == 0 388 | + && dep->l_main_map == 0) 389 | + dfs_traversal (rpo, dep, do_reldeps); 390 | } 391 | + } 392 | + 393 | + *rpo -= 1; 394 | + **rpo = map; 395 | +} 396 | 397 | - skip: 398 | - if (++i == nmaps) 399 | - break; 400 | - next_clear: 401 | - memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 402 | +/* Topologically sort array MAPS according to dependencies of the contained 403 | + objects. */ 404 | 405 | - next:; 406 | +static void 407 | +_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps, 408 | + unsigned int skip __attribute__ ((unused)), bool for_fini) 409 | +{ 410 | + for (int i = nmaps - 1; i >= 0; i--) 411 | + maps[i]->l_visited = 0; 412 | + 413 | + /* We apply DFS traversal for each of maps[i] until the whole total order 414 | + is found and we're at the start of the Reverse-Postorder (RPO) sequence, 415 | + which is a topological sort. 416 | + 417 | + We go from maps[nmaps - 1] backwards towards maps[0] at this level. 418 | + Due to the breadth-first search (BFS) ordering we receive, going 419 | + backwards usually gives a more shallow depth-first recursion depth, 420 | + adding more stack usage safety. Also, combined with the natural 421 | + processing order of l_initfini[] at each node during DFS, this maintains 422 | + an ordering closer to the original link ordering in the sorting results 423 | + under most simpler cases. 424 | + 425 | + Another reason we order the top level backwards, it that maps[0] is 426 | + usually exactly the main object of which we're in the midst of 427 | + _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still 428 | + blank. If we start the traversal from maps[0], since having no 429 | + dependencies yet filled in, maps[0] will always be immediately 430 | + incorrectly placed at the last place in the order (first in reverse). 431 | + Adjusting the order so that maps[0] is last traversed naturally avoids 432 | + this problem. 433 | + 434 | + Further, the old "optimization" of skipping the main object at maps[0] 435 | + from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general 436 | + no longer valid, since traversing along object dependency-links 437 | + may "find" the main object even when it is not included in the initial 438 | + order (e.g. a dlopen()'ed shared object can have circular dependencies 439 | + linked back to itself). In such a case, traversing N-1 objects will 440 | + create a N-object result, and raise problems. 441 | + 442 | + To summarize, just passing in the full list, and iterating from back 443 | + to front makes things much more straightforward. */ 444 | + 445 | + /* Array to hold RPO sorting results, before we copy back to maps[]. */ 446 | + struct link_map *rpo[nmaps]; 447 | + 448 | + /* The 'head' position during each DFS iteration. Note that we start at 449 | + one past the last element due to first-decrement-then-store (see the 450 | + bottom of above dfs_traversal() routine). */ 451 | + struct link_map **rpo_head = &rpo[nmaps]; 452 | + 453 | + bool do_reldeps = false; 454 | + bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL); 455 | + 456 | + for (int i = nmaps - 1; i >= 0; i--) 457 | + { 458 | + dfs_traversal (&rpo_head, maps[i], do_reldeps_ref); 459 | + 460 | + /* We can break early if all objects are already placed. */ 461 | + if (rpo_head == rpo) 462 | + goto end; 463 | + } 464 | + assert (rpo_head == rpo); 465 | + 466 | + end: 467 | + /* Here we may do a second pass of sorting, using only l_initfini[] 468 | + static dependency links. This is avoided if !FOR_FINI or if we didn't 469 | + find any reldeps in the first DFS traversal. 470 | + 471 | + The reason we do this is: while it is unspecified how circular 472 | + dependencies should be handled, the presumed reasonable behavior is to 473 | + have destructors to respect static dependency links as much as possible, 474 | + overriding reldeps if needed. And the first sorting pass, which takes 475 | + l_initfini/l_reldeps links equally, may not preserve this priority. 476 | + 477 | + Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account 478 | + (see how the do_reldeps argument to dfs_traversal() is NULL below). */ 479 | + if (do_reldeps) 480 | + { 481 | + for (int i = nmaps - 1; i >= 0; i--) 482 | + rpo[i]->l_visited = 0; 483 | + 484 | + struct link_map **maps_head = &maps[nmaps]; 485 | + for (int i = nmaps - 1; i >= 0; i--) 486 | + { 487 | + dfs_traversal (&maps_head, rpo[i], NULL); 488 | + 489 | + /* We can break early if all objects are already placed. 490 | + The below memcpy is not needed in the do_reldeps case here, 491 | + since we wrote back to maps[] during DFS traversal. */ 492 | + if (maps_head == maps) 493 | + return; 494 | + } 495 | + assert (maps_head == maps); 496 | + return; 497 | } 498 | + 499 | + memcpy (maps, rpo, sizeof (struct link_map *) * nmaps); 500 | +} 501 | + 502 | +void 503 | +_dl_sort_maps (struct link_map **maps, unsigned int nmaps, 504 | + unsigned int skip, bool for_fini) 505 | +{ 506 | + /* It can be tempting to use a static function pointer to store and call 507 | + the current selected sorting algorithm routine, but experimentation 508 | + shows that current processors still do not handle indirect branches 509 | + that efficiently, plus a static function pointer will involve 510 | + PTR_MANGLE/DEMANGLE, further impairing performance of small, common 511 | + input cases. A simple if-case with direct function calls appears to 512 | + be the fastest. */ 513 | + _dl_sort_maps_dfs (maps, nmaps, skip, for_fini); 514 | } 515 | diff --git a/elf/rtld.c b/elf/rtld.c 516 | index e0752eb..e1d056b 100644 517 | --- a/elf/rtld.c 518 | +++ b/elf/rtld.c 519 | @@ -1340,6 +1340,9 @@ of this helper program; chances are you did not intend to run this program.\n\ 520 | main_map->l_name = (char *) ""; 521 | *user_entry = main_map->l_entry; 522 | 523 | + /* Set bit indicating this is the main program map. */ 524 | + main_map->l_main_map = 1; 525 | + 526 | #ifdef HAVE_AUX_VECTOR 527 | /* Adjust the on-stack auxiliary vector so that it looks like the 528 | binary was executed directly. */ 529 | diff --git a/include/link.h b/include/link.h 530 | index aea2684..3a87694 100644 531 | --- a/include/link.h 532 | +++ b/include/link.h 533 | @@ -177,6 +177,11 @@ struct link_map 534 | unsigned int l_init_called:1; /* Nonzero if DT_INIT function called. */ 535 | unsigned int l_global:1; /* Nonzero if object in _dl_global_scope. */ 536 | unsigned int l_reserved:2; /* Reserved for internal use. */ 537 | + unsigned int l_main_map:1; /* Nonzero for the map of the main program. */ 538 | + unsigned int l_visited:1; /* Used internally for map dependency 539 | + graph traversal. */ 540 | + unsigned int l_map_used:1; /* These two bits are used during traversal */ 541 | + unsigned int l_map_done:1; /* of maps in _dl_close_worker. */ 542 | unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed 543 | to by `l_phdr' is allocated. */ 544 | unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in 545 | diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h 546 | index d7e1515..5bed748 100644 547 | --- a/sysdeps/generic/ldsodefs.h 548 | +++ b/sysdeps/generic/ldsodefs.h 549 | @@ -1010,7 +1010,7 @@ extern void _dl_fini (void) attribute_hidden; 550 | 551 | /* Sort array MAPS according to dependencies of the contained objects. */ 552 | extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 553 | - char *used, bool for_fini) attribute_hidden; 554 | + unsigned int skip, bool for_fini) attribute_hidden; 555 | 556 | /* The dynamic linker calls this function before and having changing 557 | any shared object mappings. The `r_state' member of `struct r_debug' 558 | -------------------------------------------------------------------------------- /patches/debian/2.27/unsubmitted-mathworks-bz19329-1-of-2.v2.27.patch: -------------------------------------------------------------------------------- 1 | From: Szabolcs Nagy 2 | To: GNU C Library 3 | Cc: , Torvald Riegel 4 | Date: Wed, 30 Nov 2016 11:44:25 +0000 5 | Subject: [RFC PATCH 1/2][BZ 19329] remove broken code path for easier code review 6 | 7 | This patch is not necessary for the bug fix, just makes concurrency 8 | code review easier (removes a data race and overflow from a broken 9 | code path). 10 | 11 | dlopen can oom crash anyway in _dl_resize_dtv and it's probably 12 | better to crash than leave half setup modules around. 13 | 14 | 2016-11-30 Szabolcs Nagy 15 | 16 | * elf/dl-tls.c (_dl_add_to_slotinfo): OOM crash. 17 | 18 | --- a/elf/dl-tls.c 19 | +++ b/elf/dl-tls.c 20 | @@ -918,18 +918,10 @@ _dl_add_to_slotinfo (struct link_map *l) 21 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 22 | if (listp == NULL) 23 | { 24 | - /* We ran out of memory. We will simply fail this 25 | - call but don't undo anything we did so far. The 26 | - application will crash or be terminated anyway very 27 | - soon. */ 28 | - 29 | - /* We have to do this since some entries in the dtv 30 | - slotinfo array might already point to this 31 | - generation. */ 32 | - ++GL(dl_tls_generation); 33 | - 34 | - _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\ 35 | -cannot create TLS data structures")); 36 | + /* We ran out of memory in dlopen while updating tls structures. 37 | + TODO: side-effects should be rolled back and the failure should 38 | + be reported to the caller, but that's hard. */ 39 | + oom (); 40 | } 41 | 42 | listp->len = TLS_SLOTINFO_SURPLUS; 43 | -------------------------------------------------------------------------------- /patches/debian/2.27/unsubmitted-mathworks-bz19329-2-of-2.v2.27.patch: -------------------------------------------------------------------------------- 1 | From: Szabolcs Nagy 2 | To: GNU C Library , Torvald Riegel 3 | Cc: 4 | Date: Wed, 30 Nov 2016 11:44:32 +0000 5 | Subject: [RFC PATCH 2/2][BZ 19329] Fix data races between pthread_create and dlopen 6 | 7 | This fixes a subset of the issues described in 8 | https://sourceware.org/ml/libc-alpha/2016-11/msg01026.html 9 | without adding locks to pthread_create. 10 | 11 | Only races between dlopen and pthread_create were considered, 12 | and the asserts got removed that tried to check for concurrency 13 | issues. 14 | 15 | The patch is incomplete because dlclose, tls access and 16 | dl_iterate_phdr related code paths are not modified. 17 | 18 | dlclose should be updated in a similar fashion to dlopen 19 | to make the patch complete alternatively pthread_create 20 | may take the GL(dl_load_write_lock) to sync with dlclose 21 | or the GL(dl_load_lock) to sync with dlopen and dlclose 22 | (that would simplify the concurrency design, but increase 23 | lock contention on the locks). 24 | 25 | 2016-11-30 Szabolcs Nagy 26 | 27 | [BZ #19329] 28 | * elf/dl-open.c (dl_open_worker): Write GL(dl_tls_generation) 29 | atomically. 30 | * elf/dl-tls.c (_dl_allocate_tls_init): Read GL(dl_tls_generation), 31 | GL(dl_tls_max_dtv_idx), slotinfo entries and listp->next atomically. 32 | Remove assertions that cannot be guaranteed. 33 | (_dl_add_to_slotinfo): Write the slotinfo entries and listp->next 34 | atomically. 35 | 36 | --- a/elf/dl-open.c 37 | +++ b/elf/dl-open.c 38 | @@ -482,9 +482,17 @@ dl_open_worker (void *a) 39 | } 40 | 41 | /* Bump the generation number if necessary. */ 42 | - if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0)) 43 | - _dl_fatal_printf (N_("\ 44 | + if (any_tls) 45 | + { 46 | + /* This cannot be in a data-race so non-atomic load is valid too. */ 47 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 48 | + /* Synchronize with _dl_allocate_tls_init (see notes there) and 49 | + avoid storing an overflowed counter. */ 50 | + if (__builtin_expect (newgen == 0, 0)) 51 | + _dl_fatal_printf (N_("\ 52 | TLS generation counter wrapped! Please report this.")); 53 | + atomic_store_release (&GL(dl_tls_generation), newgen); 54 | + } 55 | 56 | /* We need a second pass for static tls data, because _dl_update_slotinfo 57 | must not be run while calls to _dl_add_to_slotinfo are still pending. */ 58 | --- a/elf/dl-tls.c 59 | +++ b/elf/dl-tls.c 60 | @@ -432,6 +432,36 @@ _dl_resize_dtv (dtv_t *dtv) 61 | } 62 | 63 | 64 | +/* 65 | +CONCURRENCY NOTES 66 | + 67 | +dlopen (and dlclose) holds the GL(dl_load_lock) while writing shared state, 68 | +which may be concurrently read by pthread_create and tls access without taking 69 | +the lock, so atomic access should be used. The shared state: 70 | + 71 | + GL(dl_tls_max_dtv_idx) - max modid assigned, (modid can be reused). 72 | + GL(dl_tls_generation) - generation count, incremented by dlopen and dlclose. 73 | + GL(dl_tls_dtv_slotinfo_list) - list of entries, contains generation count 74 | + and link_map for each module with a modid. 75 | + 76 | +A module gets a modid assigned if it has tls, a modid identifies a slotinfo 77 | +entry and it is the index of the corresponding dtv slot. The generation count 78 | +is assigned to slotinfo entries of a newly loaded or unloaded module and its 79 | +newly loaded or unloaded dependencies. 80 | + 81 | +TODO: dlclose may free memory read by a concurrent pthread_create or tls 82 | +access. This is broken now, so it is assumed that dlclose does not free 83 | +link_map structures while pthread_create or __tls_get_addr is reading them. 84 | + 85 | +pthread_create calls _dl_allocate_tls_init (before creating the new thread), 86 | +which should guarantee that the dtv is in a consistent state at the end: 87 | + 88 | +All slotinfo updates with generation <= dtv[0].counter are reflected in the 89 | +dtv and arbitrary later module unloads may also be reflected as unallocated 90 | +entries. (Note: a modid reuse implies a module unload and accessing tls in 91 | +an unloaded module is undefined.) 92 | +*/ 93 | + 94 | void * 95 | _dl_allocate_tls_init (void *result) 96 | { 97 | @@ -443,12 +473,24 @@ _dl_allocate_tls_init (void *result) 98 | struct dtv_slotinfo_list *listp; 99 | size_t total = 0; 100 | size_t maxgen = 0; 101 | + /* Synchronizes with the increments in dl_{open,close}_worker. 102 | + Slotinfo updates of this generation are sequenced before the 103 | + write we read from here. */ 104 | + size_t gen_count = atomic_load_acquire (&GL(dl_tls_generation)); 105 | + /* Either reads from the last write that is sequenced before the 106 | + generation counter increment we synchronized with or a write 107 | + made by a later dlopen/dlclose. dlclose may decrement this, 108 | + but only if related modules are unloaded. So it is an upper 109 | + bound on non-unloaded modids up to gen_count generation. */ 110 | + size_t dtv_slots = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); 111 | 112 | /* Check if the current dtv is big enough. */ 113 | - if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) 114 | + if (dtv[-1].counter < dtv_slots) 115 | { 116 | /* Resize the dtv. */ 117 | dtv = _dl_resize_dtv (dtv); 118 | + /* _dl_resize_dtv rereads GL(dl_tls_max_dtv_idx) which may decrease. */ 119 | + dtv_slots = dtv[-1].counter; 120 | 121 | /* Install this new dtv in the thread data structures. */ 122 | INSTALL_DTV (result, &dtv[-1]); 123 | @@ -465,22 +507,33 @@ _dl_allocate_tls_init (void *result) 124 | for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) 125 | { 126 | struct link_map *map; 127 | + size_t gen; 128 | void *dest; 129 | 130 | /* Check for the total number of used slots. */ 131 | - if (total + cnt > GL(dl_tls_max_dtv_idx)) 132 | + if (total + cnt > dtv_slots) 133 | break; 134 | 135 | - map = listp->slotinfo[cnt].map; 136 | + /* Synchronize with dl_add_to_slotinfo and remove_slotinfo. */ 137 | + map = atomic_load_acquire (&listp->slotinfo[cnt].map); 138 | if (map == NULL) 139 | /* Unused entry. */ 140 | continue; 141 | 142 | + /* Consistent generation count with the map read above. 143 | + Inconsistent gen may be read if the entry is being reused, 144 | + in which case it is larger than gen_count and we skip it. */ 145 | + gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); 146 | + if (gen > gen_count) 147 | + /* New entry. */ 148 | + continue; 149 | + 150 | /* Keep track of the maximum generation number. This might 151 | not be the generation counter. */ 152 | - assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation)); 153 | - maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); 154 | + maxgen = MAX (maxgen, gen); 155 | 156 | + /* TODO: concurrent dlclose may free map which would break 157 | + the rest of the code below. */ 158 | dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; 159 | dtv[map->l_tls_modid].pointer.to_free = NULL; 160 | 161 | @@ -510,11 +563,15 @@ _dl_allocate_tls_init (void *result) 162 | } 163 | 164 | total += cnt; 165 | - if (total >= GL(dl_tls_max_dtv_idx)) 166 | + if (total >= dtv_slots) 167 | break; 168 | 169 | - listp = listp->next; 170 | - assert (listp != NULL); 171 | + /* Synchronize with dl_add_to_slotinfo. */ 172 | + listp = atomic_load_acquire (&listp->next); 173 | + /* dtv_slots is an upper bound on the number of entries we care 174 | + about, the list may end sooner. */ 175 | + if (listp == NULL) 176 | + break; 177 | } 178 | 179 | /* The DTV version is up-to-date now. */ 180 | @@ -913,7 +970,7 @@ _dl_add_to_slotinfo (struct link_map *l) 181 | the first slot. */ 182 | assert (idx == 0); 183 | 184 | - listp = prevp->next = (struct dtv_slotinfo_list *) 185 | + listp = (struct dtv_slotinfo_list *) 186 | malloc (sizeof (struct dtv_slotinfo_list) 187 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 188 | if (listp == NULL) 189 | @@ -928,9 +985,17 @@ _dl_add_to_slotinfo (struct link_map *l) 190 | listp->next = NULL; 191 | memset (listp->slotinfo, '\0', 192 | TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 193 | + /* Add the new list item and synchronize with _dl_allocate_tls_init. */ 194 | + atomic_store_release (&prevp->next, listp); 195 | } 196 | 197 | /* Add the information into the slotinfo data structure. */ 198 | - listp->slotinfo[idx].map = l; 199 | - listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; 200 | + 201 | + /* This cannot be in a data-race so non-atomic load would be valid too. */ 202 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 203 | + /* TODO: Concurrent readers may see an overflowed gen, which is bad, 204 | + but overflow is guaranteed to crash the dlopen that is executing. */ 205 | + atomic_store_relaxed (&listp->slotinfo[idx].gen, newgen); 206 | + /* Synchronize with _dl_allocate_tls_init (see notes there). */ 207 | + atomic_store_release (&listp->slotinfo[idx].map, l); 208 | } 209 | -------------------------------------------------------------------------------- /patches/debian/2.27/unsubmitted-mathworks-bz19329-fixup.v2.27.patch: -------------------------------------------------------------------------------- 1 | --- a/elf/dl-tls.c 2 | +++ b/elf/dl-tls.c 3 | @@ -563,7 +563,7 @@ _dl_allocate_tls_init (void *result) 4 | } 5 | 6 | total += cnt; 7 | - if (total >= dtv_slots) 8 | + if (total > dtv_slots) 9 | break; 10 | 11 | /* Synchronize with dl_add_to_slotinfo. */ 12 | -------------------------------------------------------------------------------- /patches/debian/2.28: -------------------------------------------------------------------------------- 1 | 2.27 -------------------------------------------------------------------------------- /patches/debian/2.31/unsubmitted-mathworks-0-bz17645.v2.31.patch: -------------------------------------------------------------------------------- 1 | **************************ORIGINAL HEADER***************************** 2 | From patchwork Thu Oct 21 13:41:22 2021 3 | Content-Type: text/plain; charset="utf-8" 4 | MIME-Version: 1.0 5 | Content-Transfer-Encoding: 7bit 6 | X-Patchwork-Submitter: Chung-Lin Tang 7 | X-Patchwork-Id: 46497 8 | Return-Path: 9 | X-Original-To: patchwork@sourceware.org 10 | Delivered-To: patchwork@sourceware.org 11 | Received: from server2.sourceware.org (localhost [IPv6:::1]) 12 | by sourceware.org (Postfix) with ESMTP id B66BB3857801 13 | for ; Thu, 21 Oct 2021 13:41:58 +0000 (GMT) 14 | X-Original-To: libc-alpha@sourceware.org 15 | Delivered-To: libc-alpha@sourceware.org 16 | Received: from seed.net.tw (sn15.seed.net.tw [139.175.54.15]) 17 | by sourceware.org (Postfix) with ESMTP id 3B6AC3858405 18 | for ; Thu, 21 Oct 2021 13:41:41 +0000 (GMT) 19 | DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 3B6AC3858405 20 | Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) 21 | header.from=codesourcery.com 22 | Authentication-Results: sourceware.org; 23 | spf=none smtp.mailfrom=codesourcery.com 24 | Received: from [112.104.15.59] (port=35230 helo=localhost.localdomain) 25 | by seed.net.tw with esmtp (Seednet 4.69:2) 26 | id 1mdYK9-000NfF-1W; Thu, 21 Oct 2021 21:41:37 +0800 27 | From: Chung-Lin Tang 28 | To: libc-alpha@sourceware.org, 29 | Adhemerval Zanella 30 | Subject: [PATCH v8 2/2] elf: Fix slow DSO sorting behavior in dynamic loader 31 | (BZ #17645) 32 | Date: Thu, 21 Oct 2021 21:41:22 +0800 33 | Message-Id: <20211021134122.3141-2-cltang@codesourcery.com> 34 | X-Mailer: git-send-email 2.17.1 35 | In-Reply-To: <20211021134122.3141-1-cltang@codesourcery.com> 36 | References: <20211021134122.3141-1-cltang@codesourcery.com> 37 | X-Spam-Status: No, score=-19.6 required=5.0 tests=BAYES_00, FORGED_SPF_HELO, 38 | GIT_PATCH_0, KAM_DMARC_STATUS, KAM_LAZY_DOMAIN_SECURITY, KAM_SHORT, 39 | RCVD_IN_DNSWL_LOW, SPF_HELO_PASS, SPF_NONE, 40 | TXREP autolearn=ham autolearn_force=no version=3.4.4 41 | X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on 42 | server2.sourceware.org 43 | X-BeenThere: libc-alpha@sourceware.org 44 | X-Mailman-Version: 2.1.29 45 | Precedence: list 46 | List-Id: Libc-alpha mailing list 47 | List-Unsubscribe: , 48 | 49 | List-Archive: 50 | List-Post: 51 | List-Help: 52 | List-Subscribe: , 53 | 54 | Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org 55 | Sender: "Libc-alpha" 56 | 57 | 58 | This second patch contains the actual implementation of a new sorting algorithm 59 | for shared objects in the dynamic loader, which solves the slow behavior that 60 | the current "old" algorithm falls into when the DSO set contains circular 61 | dependencies. 62 | 63 | The new algorithm implemented here is simply depth-first search (DFS) to obtain 64 | the Reverse-Post Order (RPO) sequence, a topological sort. A new l_visited:1 65 | bitfield is added to struct link_map to more elegantly facilitate such a search. 66 | 67 | The DFS algorithm is applied to the input maps[nmap-1] backwards towards 68 | maps[0]. This has the effect of a more "shallow" recursion depth in general 69 | since the input is in BFS. Also, when combined with the natural order of 70 | processing l_initfini[] at each node, this creates a resulting output sorting 71 | closer to the intuitive "left-to-right" order in most cases. 72 | 73 | Another notable implementation adjustment related to this _dl_sort_maps change 74 | is the removing of two char arrays 'used' and 'done' in _dl_close_worker to 75 | represent two per-map attributes. This has been changed to simply use two new 76 | bit-fields l_map_used:1, l_map_done:1 added to struct link_map. This also allows 77 | discarding the clunky 'used' array sorting that _dl_sort_maps had to sometimes 78 | do along the way. 79 | 80 | Tunable support for switching between different sorting algorithms at runtime is 81 | also added. A new tunable 'glibc.rtld.dynamic_sort' with current valid values 1 82 | (old algorithm) and 2 (new DFS algorithm) has been added. At time of commit 83 | of this patch, the default setting is 1 (old algorithm). 84 | 85 | Signed-off-by: Chung-Lin Tang 86 | Reviewed-by: Adhemerval Zanella 87 | ********************************************************************************** 88 | 89 | Mathworks 90 | Backport to glibc 2.31, remove tunable support and set default to the DFS sort map behavior 91 | 92 | Coded-by: Alan Li 93 | 94 | 95 | diff --git a/elf/dl-close.c b/elf/dl-close.c 96 | index 73b2817..cfad816 100644 97 | --- a/elf/dl-close.c 98 | +++ b/elf/dl-close.c 99 | @@ -164,8 +164,6 @@ _dl_close_worker (struct link_map *map, bool force) 100 | 101 | bool any_tls = false; 102 | const unsigned int nloaded = ns->_ns_nloaded; 103 | - char used[nloaded]; 104 | - char done[nloaded]; 105 | struct link_map *maps[nloaded]; 106 | 107 | /* Run over the list and assign indexes to the link maps and enter 108 | @@ -173,24 +171,21 @@ _dl_close_worker (struct link_map *map, bool force) 109 | int idx = 0; 110 | for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next) 111 | { 112 | - l->l_idx = idx; 113 | + l->l_map_used = 0; 114 | + l->l_map_done = 0; 115 | + l->l_idx = idx; 116 | maps[idx] = l; 117 | ++idx; 118 | - 119 | } 120 | assert (idx == nloaded); 121 | 122 | - /* Prepare the bitmaps. */ 123 | - memset (used, '\0', sizeof (used)); 124 | - memset (done, '\0', sizeof (done)); 125 | - 126 | /* Keep track of the lowest index link map we have covered already. */ 127 | int done_index = -1; 128 | while (++done_index < nloaded) 129 | { 130 | struct link_map *l = maps[done_index]; 131 | 132 | - if (done[done_index]) 133 | + if (l->l_map_done) 134 | /* Already handled. */ 135 | continue; 136 | 137 | @@ -201,12 +196,12 @@ _dl_close_worker (struct link_map *map, bool force) 138 | /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why 139 | acquire is sufficient and correct. */ 140 | && atomic_load_acquire (&l->l_tls_dtor_count) == 0 141 | - && !used[done_index]) 142 | + && !l->l_map_used) 143 | continue; 144 | 145 | /* We need this object and we handle it now. */ 146 | - done[done_index] = 1; 147 | - used[done_index] = 1; 148 | + l->l_map_used = 1; 149 | + l->l_map_done = 1; 150 | /* Signal the object is still needed. */ 151 | l->l_idx = IDX_STILL_USED; 152 | 153 | @@ -222,9 +217,9 @@ _dl_close_worker (struct link_map *map, bool force) 154 | { 155 | assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded); 156 | 157 | - if (!used[(*lp)->l_idx]) 158 | + if (!(*lp)->l_map_used) 159 | { 160 | - used[(*lp)->l_idx] = 1; 161 | + (*lp)->l_map_used = 1; 162 | /* If we marked a new object as used, and we've 163 | already processed it, then we need to go back 164 | and process again from that point forward to 165 | @@ -247,9 +242,9 @@ _dl_close_worker (struct link_map *map, bool force) 166 | { 167 | assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded); 168 | 169 | - if (!used[jmap->l_idx]) 170 | + if (!jmap->l_map_used) 171 | { 172 | - used[jmap->l_idx] = 1; 173 | + jmap->l_map_used = 1; 174 | if (jmap->l_idx - 1 < done_index) 175 | done_index = jmap->l_idx - 1; 176 | } 177 | @@ -259,8 +254,7 @@ _dl_close_worker (struct link_map *map, bool force) 178 | 179 | /* Sort the entries. We can skip looking for the binary itself which is 180 | at the front of the search list for the main namespace. */ 181 | - _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE), 182 | - used + (nsid == LM_ID_BASE), true); 183 | + _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); 184 | 185 | /* Call all termination functions at once. */ 186 | #ifdef SHARED 187 | @@ -277,7 +271,7 @@ _dl_close_worker (struct link_map *map, bool force) 188 | /* All elements must be in the same namespace. */ 189 | assert (imap->l_ns == nsid); 190 | 191 | - if (!used[i]) 192 | + if (!imap->l_map_used) 193 | { 194 | assert (imap->l_type == lt_loaded && !imap->l_nodelete_active); 195 | 196 | @@ -330,7 +324,7 @@ _dl_close_worker (struct link_map *map, bool force) 197 | if (i < first_loaded) 198 | first_loaded = i; 199 | } 200 | - /* Else used[i]. */ 201 | + /* Else imap->l_map_used. */ 202 | else if (imap->l_type == lt_loaded) 203 | { 204 | struct r_scope_elem *new_list = NULL; 205 | @@ -554,7 +548,7 @@ _dl_close_worker (struct link_map *map, bool force) 206 | for (unsigned int i = first_loaded; i < nloaded; ++i) 207 | { 208 | struct link_map *imap = maps[i]; 209 | - if (!used[i]) 210 | + if (!imap->l_map_used) 211 | { 212 | assert (imap->l_type == lt_loaded); 213 | 214 | diff --git a/elf/dl-deps.c b/elf/dl-deps.c 215 | index 5103a8a..33c126c 100644 216 | --- a/elf/dl-deps.c 217 | +++ b/elf/dl-deps.c 218 | @@ -591,7 +591,7 @@ Filters not supported with LD_TRACE_PRELINKING")); 219 | nlist * sizeof (struct link_map *)); 220 | /* We can skip looking for the binary itself which is at the front of 221 | the search list. */ 222 | - _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false); 223 | + _dl_sort_maps (l_initfini, nlist, 1, false); 224 | 225 | /* Terminate the list of dependencies. */ 226 | l_initfini[nlist] = NULL; 227 | diff --git a/elf/dl-fini.c b/elf/dl-fini.c 228 | index 226a6f0..afecbd1 100644 229 | --- a/elf/dl-fini.c 230 | +++ b/elf/dl-fini.c 231 | @@ -91,8 +91,7 @@ _dl_fini (void) 232 | /* Now we have to do the sorting. We can skip looking for the 233 | binary itself which is at the front of the search list for 234 | the main namespace. */ 235 | - _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE), 236 | - NULL, true); 237 | + _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true); 238 | 239 | /* We do not rely on the linked list of loaded object anymore 240 | from this point on. We have our own list here (maps). The 241 | diff --git a/elf/dl-open.c b/elf/dl-open.c 242 | index a5238d9..3b18c4c 100644 243 | --- a/elf/dl-open.c 244 | +++ b/elf/dl-open.c 245 | @@ -644,7 +644,7 @@ dl_open_worker (void *a) 246 | l = l->l_next; 247 | } 248 | while (l != NULL); 249 | - _dl_sort_maps (maps, nmaps, NULL, false); 250 | + _dl_sort_maps (maps, nmaps, 0, false); 251 | 252 | int relocation_in_progress = 0; 253 | 254 | diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c 255 | index 86f1e23..705be5c 100644 256 | --- a/elf/dl-sort-maps.c 257 | +++ b/elf/dl-sort-maps.c 258 | @@ -16,107 +16,168 @@ 259 | License along with the GNU C Library; if not, see 260 | . */ 261 | 262 | +#include 263 | #include 264 | 265 | 266 | -/* Sort array MAPS according to dependencies of the contained objects. 267 | - Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is 268 | - called for finishing an object. */ 269 | -void 270 | -_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, 271 | - bool for_fini) 272 | +/* We use a recursive function due to its better clarity and ease of 273 | + implementation, as well as faster execution speed. We already use 274 | + alloca() for list allocation during the breadth-first search of 275 | + dependencies in _dl_map_object_deps(), and this should be on the 276 | + same order of worst-case stack usage. 277 | + 278 | + Note: the '*rpo' parameter is supposed to point to one past the 279 | + last element of the array where we save the sort results, and is 280 | + decremented before storing the current map at each level. */ 281 | + 282 | +static void 283 | +dfs_traversal (struct link_map ***rpo, struct link_map *map, 284 | + bool *do_reldeps) 285 | { 286 | - /* A list of one element need not be sorted. */ 287 | - if (nmaps <= 1) 288 | + if (map->l_visited) 289 | return; 290 | 291 | - unsigned int i = 0; 292 | - uint16_t seen[nmaps]; 293 | - memset (seen, 0, nmaps * sizeof (seen[0])); 294 | - while (1) 295 | - { 296 | - /* Keep track of which object we looked at this round. */ 297 | - ++seen[i]; 298 | - struct link_map *thisp = maps[i]; 299 | + map->l_visited = 1; 300 | 301 | - if (__glibc_unlikely (for_fini)) 302 | + if (map->l_initfini) 303 | + { 304 | + for (int i = 0; map->l_initfini[i] != NULL; i++) 305 | { 306 | - /* Do not handle ld.so in secondary namespaces and objects which 307 | - are not removed. */ 308 | - if (thisp != thisp->l_real || thisp->l_idx == -1) 309 | - goto skip; 310 | + struct link_map *dep = map->l_initfini[i]; 311 | + if (dep->l_visited == 0 312 | + && dep->l_main_map == 0) 313 | + dfs_traversal (rpo, dep, do_reldeps); 314 | } 315 | + } 316 | 317 | - /* Find the last object in the list for which the current one is 318 | - a dependency and move the current object behind the object 319 | - with the dependency. */ 320 | - unsigned int k = nmaps - 1; 321 | - while (k > i) 322 | + if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL)) 323 | + { 324 | + /* Indicate that we encountered relocation dependencies during 325 | + traversal. */ 326 | + *do_reldeps = true; 327 | + 328 | + for (int m = map->l_reldeps->act - 1; m >= 0; m--) 329 | { 330 | - struct link_map **runp = maps[k]->l_initfini; 331 | - if (runp != NULL) 332 | - /* Look through the dependencies of the object. */ 333 | - while (*runp != NULL) 334 | - if (__glibc_unlikely (*runp++ == thisp)) 335 | - { 336 | - move: 337 | - /* Move the current object to the back past the last 338 | - object with it as the dependency. */ 339 | - memmove (&maps[i], &maps[i + 1], 340 | - (k - i) * sizeof (maps[0])); 341 | - maps[k] = thisp; 342 | - 343 | - if (used != NULL) 344 | - { 345 | - char here_used = used[i]; 346 | - memmove (&used[i], &used[i + 1], 347 | - (k - i) * sizeof (used[0])); 348 | - used[k] = here_used; 349 | - } 350 | - 351 | - if (seen[i + 1] > nmaps - i) 352 | - { 353 | - ++i; 354 | - goto next_clear; 355 | - } 356 | - 357 | - uint16_t this_seen = seen[i]; 358 | - memmove (&seen[i], &seen[i + 1], (k - i) * sizeof (seen[0])); 359 | - seen[k] = this_seen; 360 | - 361 | - goto next; 362 | - } 363 | - 364 | - if (__glibc_unlikely (for_fini && maps[k]->l_reldeps != NULL)) 365 | - { 366 | - unsigned int m = maps[k]->l_reldeps->act; 367 | - struct link_map **relmaps = &maps[k]->l_reldeps->list[0]; 368 | - 369 | - /* Look through the relocation dependencies of the object. */ 370 | - while (m-- > 0) 371 | - if (__glibc_unlikely (relmaps[m] == thisp)) 372 | - { 373 | - /* If a cycle exists with a link time dependency, 374 | - preserve the latter. */ 375 | - struct link_map **runp = thisp->l_initfini; 376 | - if (runp != NULL) 377 | - while (*runp != NULL) 378 | - if (__glibc_unlikely (*runp++ == maps[k])) 379 | - goto ignore; 380 | - goto move; 381 | - } 382 | - ignore:; 383 | - } 384 | - 385 | - --k; 386 | + struct link_map *dep = map->l_reldeps->list[m]; 387 | + if (dep->l_visited == 0 388 | + && dep->l_main_map == 0) 389 | + dfs_traversal (rpo, dep, do_reldeps); 390 | } 391 | + } 392 | + 393 | + *rpo -= 1; 394 | + **rpo = map; 395 | +} 396 | 397 | - skip: 398 | - if (++i == nmaps) 399 | - break; 400 | - next_clear: 401 | - memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 402 | +/* Topologically sort array MAPS according to dependencies of the contained 403 | + objects. */ 404 | 405 | - next:; 406 | +static void 407 | +_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps, 408 | + unsigned int skip __attribute__ ((unused)), bool for_fini) 409 | +{ 410 | + for (int i = nmaps - 1; i >= 0; i--) 411 | + maps[i]->l_visited = 0; 412 | + 413 | + /* We apply DFS traversal for each of maps[i] until the whole total order 414 | + is found and we're at the start of the Reverse-Postorder (RPO) sequence, 415 | + which is a topological sort. 416 | + 417 | + We go from maps[nmaps - 1] backwards towards maps[0] at this level. 418 | + Due to the breadth-first search (BFS) ordering we receive, going 419 | + backwards usually gives a more shallow depth-first recursion depth, 420 | + adding more stack usage safety. Also, combined with the natural 421 | + processing order of l_initfini[] at each node during DFS, this maintains 422 | + an ordering closer to the original link ordering in the sorting results 423 | + under most simpler cases. 424 | + 425 | + Another reason we order the top level backwards, it that maps[0] is 426 | + usually exactly the main object of which we're in the midst of 427 | + _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still 428 | + blank. If we start the traversal from maps[0], since having no 429 | + dependencies yet filled in, maps[0] will always be immediately 430 | + incorrectly placed at the last place in the order (first in reverse). 431 | + Adjusting the order so that maps[0] is last traversed naturally avoids 432 | + this problem. 433 | + 434 | + Further, the old "optimization" of skipping the main object at maps[0] 435 | + from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general 436 | + no longer valid, since traversing along object dependency-links 437 | + may "find" the main object even when it is not included in the initial 438 | + order (e.g. a dlopen()'ed shared object can have circular dependencies 439 | + linked back to itself). In such a case, traversing N-1 objects will 440 | + create a N-object result, and raise problems. 441 | + 442 | + To summarize, just passing in the full list, and iterating from back 443 | + to front makes things much more straightforward. */ 444 | + 445 | + /* Array to hold RPO sorting results, before we copy back to maps[]. */ 446 | + struct link_map *rpo[nmaps]; 447 | + 448 | + /* The 'head' position during each DFS iteration. Note that we start at 449 | + one past the last element due to first-decrement-then-store (see the 450 | + bottom of above dfs_traversal() routine). */ 451 | + struct link_map **rpo_head = &rpo[nmaps]; 452 | + 453 | + bool do_reldeps = false; 454 | + bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL); 455 | + 456 | + for (int i = nmaps - 1; i >= 0; i--) 457 | + { 458 | + dfs_traversal (&rpo_head, maps[i], do_reldeps_ref); 459 | + 460 | + /* We can break early if all objects are already placed. */ 461 | + if (rpo_head == rpo) 462 | + goto end; 463 | + } 464 | + assert (rpo_head == rpo); 465 | + 466 | + end: 467 | + /* Here we may do a second pass of sorting, using only l_initfini[] 468 | + static dependency links. This is avoided if !FOR_FINI or if we didn't 469 | + find any reldeps in the first DFS traversal. 470 | + 471 | + The reason we do this is: while it is unspecified how circular 472 | + dependencies should be handled, the presumed reasonable behavior is to 473 | + have destructors to respect static dependency links as much as possible, 474 | + overriding reldeps if needed. And the first sorting pass, which takes 475 | + l_initfini/l_reldeps links equally, may not preserve this priority. 476 | + 477 | + Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account 478 | + (see how the do_reldeps argument to dfs_traversal() is NULL below). */ 479 | + if (do_reldeps) 480 | + { 481 | + for (int i = nmaps - 1; i >= 0; i--) 482 | + rpo[i]->l_visited = 0; 483 | + 484 | + struct link_map **maps_head = &maps[nmaps]; 485 | + for (int i = nmaps - 1; i >= 0; i--) 486 | + { 487 | + dfs_traversal (&maps_head, rpo[i], NULL); 488 | + 489 | + /* We can break early if all objects are already placed. 490 | + The below memcpy is not needed in the do_reldeps case here, 491 | + since we wrote back to maps[] during DFS traversal. */ 492 | + if (maps_head == maps) 493 | + return; 494 | + } 495 | + assert (maps_head == maps); 496 | + return; 497 | } 498 | + 499 | + memcpy (maps, rpo, sizeof (struct link_map *) * nmaps); 500 | +} 501 | + 502 | +void 503 | +_dl_sort_maps (struct link_map **maps, unsigned int nmaps, 504 | + unsigned int skip, bool for_fini) 505 | +{ 506 | + /* It can be tempting to use a static function pointer to store and call 507 | + the current selected sorting algorithm routine, but experimentation 508 | + shows that current processors still do not handle indirect branches 509 | + that efficiently, plus a static function pointer will involve 510 | + PTR_MANGLE/DEMANGLE, further impairing performance of small, common 511 | + input cases. A simple if-case with direct function calls appears to 512 | + be the fastest. */ 513 | + _dl_sort_maps_dfs (maps, nmaps, skip, for_fini); 514 | } 515 | diff --git a/elf/rtld.c b/elf/rtld.c 516 | index e0752eb..e1d056b 100644 517 | --- a/elf/rtld.c 518 | +++ b/elf/rtld.c 519 | @@ -1340,6 +1340,9 @@ of this helper program; chances are you did not intend to run this program.\n\ 520 | main_map->l_name = (char *) ""; 521 | *user_entry = main_map->l_entry; 522 | 523 | + /* Set bit indicating this is the main program map. */ 524 | + main_map->l_main_map = 1; 525 | + 526 | #ifdef HAVE_AUX_VECTOR 527 | /* Adjust the on-stack auxiliary vector so that it looks like the 528 | binary was executed directly. */ 529 | diff --git a/include/link.h b/include/link.h 530 | index aea2684..3a87694 100644 531 | --- a/include/link.h 532 | +++ b/include/link.h 533 | @@ -177,6 +177,11 @@ struct link_map 534 | unsigned int l_init_called:1; /* Nonzero if DT_INIT function called. */ 535 | unsigned int l_global:1; /* Nonzero if object in _dl_global_scope. */ 536 | unsigned int l_reserved:2; /* Reserved for internal use. */ 537 | + unsigned int l_main_map:1; /* Nonzero for the map of the main program. */ 538 | + unsigned int l_visited:1; /* Used internally for map dependency 539 | + graph traversal. */ 540 | + unsigned int l_map_used:1; /* These two bits are used during traversal */ 541 | + unsigned int l_map_done:1; /* of maps in _dl_close_worker. */ 542 | unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed 543 | to by `l_phdr' is allocated. */ 544 | unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in 545 | diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h 546 | index d7e1515..5bed748 100644 547 | --- a/sysdeps/generic/ldsodefs.h 548 | +++ b/sysdeps/generic/ldsodefs.h 549 | @@ -1010,7 +1010,7 @@ extern void _dl_fini (void) attribute_hidden; 550 | 551 | /* Sort array MAPS according to dependencies of the contained objects. */ 552 | extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 553 | - char *used, bool for_fini) attribute_hidden; 554 | + unsigned int skip, bool for_fini) attribute_hidden; 555 | 556 | /* The dynamic linker calls this function before and having changing 557 | any shared object mappings. The `r_state' member of `struct r_debug' 558 | -------------------------------------------------------------------------------- /patches/debian/2.31/unsubmitted-mathworks-bz19329-1-of-2.v2.31.patch: -------------------------------------------------------------------------------- 1 | From: Szabolcs Nagy 2 | To: GNU C Library 3 | Cc: , Torvald Riegel 4 | Date: Wed, 30 Nov 2016 11:44:25 +0000 5 | Subject: [RFC PATCH 1/2][BZ 19329] remove broken code path for easier code review 6 | 7 | This patch is not necessary for the bug fix, just makes concurrency 8 | code review easier (removes a data race and overflow from a broken 9 | code path). 10 | 11 | dlopen can oom crash anyway in _dl_resize_dtv and it's probably 12 | better to crash than leave half setup modules around. 13 | 14 | 2016-11-30 Szabolcs Nagy 15 | 16 | * elf/dl-tls.c (_dl_add_to_slotinfo): OOM crash. 17 | 18 | --- a/elf/dl-tls.c 19 | +++ b/elf/dl-tls.c 20 | @@ -918,18 +918,10 @@ _dl_add_to_slotinfo (struct link_map *l) 21 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 22 | if (listp == NULL) 23 | { 24 | - /* We ran out of memory. We will simply fail this 25 | - call but don't undo anything we did so far. The 26 | - application will crash or be terminated anyway very 27 | - soon. */ 28 | - 29 | - /* We have to do this since some entries in the dtv 30 | - slotinfo array might already point to this 31 | - generation. */ 32 | - ++GL(dl_tls_generation); 33 | - 34 | - _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\ 35 | -cannot create TLS data structures")); 36 | + /* We ran out of memory in dlopen while updating tls structures. 37 | + TODO: side-effects should be rolled back and the failure should 38 | + be reported to the caller, but that's hard. */ 39 | + oom (); 40 | } 41 | 42 | listp->len = TLS_SLOTINFO_SURPLUS; 43 | -------------------------------------------------------------------------------- /patches/debian/2.31/unsubmitted-mathworks-bz19329-2-of-2.v2.31.patch: -------------------------------------------------------------------------------- 1 | The content of this file is derived from original patches published to libc-alpha at sourceware dot org 2 | by Szabolcs Nagy. The original, unmodified, version is available in the patches/debian/2.27 folder of 3 | this repository or from https://sourceware.org/legacy-ml/libc-alpha/2016-11/msg01093.html 4 | 5 | The original content has been adapted by MathWorks to enable patching glibc 2.31 since there were 6 | source code changes in glibc since the original patches were constructed 7 | 8 | NOTE: ADAPTED TO PATCH v2.31 via quilt 9 | 10 | From: Szabolcs Nagy 11 | To: GNU C Library , Torvald Riegel 12 | Cc: 13 | Date: Wed, 30 Nov 2016 11:44:32 +0000 14 | Subject: [RFC PATCH 2/2][BZ 19329] Fix data races between pthread_create and dlopen 15 | 16 | This fixes a subset of the issues described in 17 | https://sourceware.org/ml/libc-alpha/2016-11/msg01026.html 18 | without adding locks to pthread_create. 19 | 20 | Only races between dlopen and pthread_create were considered, 21 | and the asserts got removed that tried to check for concurrency 22 | issues. 23 | 24 | The patch is incomplete because dlclose, tls access and 25 | dl_iterate_phdr related code paths are not modified. 26 | 27 | dlclose should be updated in a similar fashion to dlopen 28 | to make the patch complete alternatively pthread_create 29 | may take the GL(dl_load_write_lock) to sync with dlclose 30 | or the GL(dl_load_lock) to sync with dlopen and dlclose 31 | (that would simplify the concurrency design, but increase 32 | lock contention on the locks). 33 | 34 | 2016-11-30 Szabolcs Nagy 35 | 36 | [BZ #19329] 37 | * elf/dl-open.c (dl_open_worker): Write GL(dl_tls_generation) 38 | atomically. 39 | * elf/dl-tls.c (_dl_allocate_tls_init): Read GL(dl_tls_generation), 40 | GL(dl_tls_max_dtv_idx), slotinfo entries and listp->next atomically. 41 | Remove assertions that cannot be guaranteed. 42 | (_dl_add_to_slotinfo): Write the slotinfo entries and listp->next 43 | atomically. 44 | 45 | --- a/elf/dl-open.c 46 | +++ b/elf/dl-open.c 47 | @@ -387,9 +387,14 @@ update_tls_slotinfo (struct link_map *ne 48 | } 49 | } 50 | 51 | - if (__builtin_expect (++GL(dl_tls_generation) == 0, 0)) 52 | + /* This cannot be in a data-race so non-atomic load is valid too. */ 53 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 54 | + /* Synchronize with _dl_allocate_tls_init (see notes there) and 55 | + avoid storing an overflowed counter. */ 56 | + if (__builtin_expect (newgen == 0, 0)) 57 | _dl_fatal_printf (N_("\ 58 | TLS generation counter wrapped! Please report this.")); 59 | + atomic_store_release (&GL(dl_tls_generation), newgen); 60 | 61 | /* We need a second pass for static tls data, because 62 | _dl_update_slotinfo must not be run while calls to 63 | --- a/elf/dl-tls.c 64 | +++ b/elf/dl-tls.c 65 | @@ -489,6 +489,36 @@ _dl_resize_dtv (dtv_t *dtv) 66 | } 67 | 68 | 69 | +/* 70 | +CONCURRENCY NOTES 71 | + 72 | +dlopen (and dlclose) holds the GL(dl_load_lock) while writing shared state, 73 | +which may be concurrently read by pthread_create and tls access without taking 74 | +the lock, so atomic access should be used. The shared state: 75 | + 76 | + GL(dl_tls_max_dtv_idx) - max modid assigned, (modid can be reused). 77 | + GL(dl_tls_generation) - generation count, incremented by dlopen and dlclose. 78 | + GL(dl_tls_dtv_slotinfo_list) - list of entries, contains generation count 79 | + and link_map for each module with a modid. 80 | + 81 | +A module gets a modid assigned if it has tls, a modid identifies a slotinfo 82 | +entry and it is the index of the corresponding dtv slot. The generation count 83 | +is assigned to slotinfo entries of a newly loaded or unloaded module and its 84 | +newly loaded or unloaded dependencies. 85 | + 86 | +TODO: dlclose may free memory read by a concurrent pthread_create or tls 87 | +access. This is broken now, so it is assumed that dlclose does not free 88 | +link_map structures while pthread_create or __tls_get_addr is reading them. 89 | + 90 | +pthread_create calls _dl_allocate_tls_init (before creating the new thread), 91 | +which should guarantee that the dtv is in a consistent state at the end: 92 | + 93 | +All slotinfo updates with generation <= dtv[0].counter are reflected in the 94 | +dtv and arbitrary later module unloads may also be reflected as unallocated 95 | +entries. (Note: a modid reuse implies a module unload and accessing tls in 96 | +an unloaded module is undefined.) 97 | +*/ 98 | + 99 | void * 100 | _dl_allocate_tls_init (void *result) 101 | { 102 | @@ -500,12 +530,24 @@ _dl_allocate_tls_init (void *result) 103 | struct dtv_slotinfo_list *listp; 104 | size_t total = 0; 105 | size_t maxgen = 0; 106 | + /* Synchronizes with the increments in dl_{open,close}_worker. 107 | + Slotinfo updates of this generation are sequenced before the 108 | + write we read from here. */ 109 | + size_t gen_count = atomic_load_acquire (&GL(dl_tls_generation)); 110 | + /* Either reads from the last write that is sequenced before the 111 | + generation counter increment we synchronized with or a write 112 | + made by a later dlopen/dlclose. dlclose may decrement this, 113 | + but only if related modules are unloaded. So it is an upper 114 | + bound on non-unloaded modids up to gen_count generation. */ 115 | + size_t dtv_slots = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); 116 | 117 | /* Check if the current dtv is big enough. */ 118 | - if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) 119 | + if (dtv[-1].counter < dtv_slots) 120 | { 121 | /* Resize the dtv. */ 122 | dtv = _dl_resize_dtv (dtv); 123 | + /* _dl_resize_dtv rereads GL(dl_tls_max_dtv_idx) which may decrease. */ 124 | + dtv_slots = dtv[-1].counter; 125 | 126 | /* Install this new dtv in the thread data structures. */ 127 | INSTALL_DTV (result, &dtv[-1]); 128 | @@ -522,22 +564,33 @@ _dl_allocate_tls_init (void *result) 129 | for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) 130 | { 131 | struct link_map *map; 132 | + size_t gen; 133 | void *dest; 134 | 135 | /* Check for the total number of used slots. */ 136 | - if (total + cnt > GL(dl_tls_max_dtv_idx)) 137 | + if (total + cnt > dtv_slots) 138 | break; 139 | 140 | - map = listp->slotinfo[cnt].map; 141 | + /* Synchronize with dl_add_to_slotinfo and remove_slotinfo. */ 142 | + map = atomic_load_acquire (&listp->slotinfo[cnt].map); 143 | if (map == NULL) 144 | /* Unused entry. */ 145 | continue; 146 | 147 | + /* Consistent generation count with the map read above. 148 | + Inconsistent gen may be read if the entry is being reused, 149 | + in which case it is larger than gen_count and we skip it. */ 150 | + gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); 151 | + if (gen > gen_count) 152 | + /* New entry. */ 153 | + continue; 154 | + 155 | /* Keep track of the maximum generation number. This might 156 | not be the generation counter. */ 157 | - assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation)); 158 | - maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); 159 | + maxgen = MAX (maxgen, gen); 160 | 161 | + /* TODO: concurrent dlclose may free map which would break 162 | + the rest of the code below. */ 163 | dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; 164 | dtv[map->l_tls_modid].pointer.to_free = NULL; 165 | 166 | @@ -567,11 +620,15 @@ _dl_allocate_tls_init (void *result) 167 | } 168 | 169 | total += cnt; 170 | - if (total >= GL(dl_tls_max_dtv_idx)) 171 | + if (total >= dtv_slots) 172 | break; 173 | 174 | - listp = listp->next; 175 | - assert (listp != NULL); 176 | + /* Synchronize with dl_add_to_slotinfo. */ 177 | + listp = atomic_load_acquire (&listp->next); 178 | + /* dtv_slots is an upper bound on the number of entries we care 179 | + about, the list may end sooner. */ 180 | + if (listp == NULL) 181 | + break; 182 | } 183 | 184 | /* The DTV version is up-to-date now. */ 185 | @@ -970,7 +1027,7 @@ _dl_add_to_slotinfo (struct link_map *l, 186 | the first slot. */ 187 | assert (idx == 0); 188 | 189 | - listp = prevp->next = (struct dtv_slotinfo_list *) 190 | + listp = (struct dtv_slotinfo_list *) 191 | malloc (sizeof (struct dtv_slotinfo_list) 192 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 193 | if (listp == NULL) 194 | @@ -985,12 +1042,19 @@ _dl_add_to_slotinfo (struct link_map *l, 195 | listp->next = NULL; 196 | memset (listp->slotinfo, '\0', 197 | TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 198 | + /* Add the new list item and synchronize with _dl_allocate_tls_init. */ 199 | + atomic_store_release (&prevp->next, listp); 200 | } 201 | 202 | /* Add the information into the slotinfo data structure. */ 203 | if (do_add) 204 | { 205 | - listp->slotinfo[idx].map = l; 206 | - listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; 207 | + /* This cannot be in a data-race so non-atomic load would be valid too. */ 208 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 209 | + /* TODO: Concurrent readers may see an overflowed gen, which is bad, 210 | + but overflow is guaranteed to crash the dlopen that is executing. */ 211 | + atomic_store_relaxed (&listp->slotinfo[idx].gen, newgen); 212 | + /* Synchronize with _dl_allocate_tls_init (see notes there). */ 213 | + atomic_store_release (&listp->slotinfo[idx].map, l); 214 | } 215 | } 216 | -------------------------------------------------------------------------------- /patches/debian/2.31/unsubmitted-mathworks-bz19329-fixup.v2.31.patch: -------------------------------------------------------------------------------- 1 | Fix off-by-one error listed in https://sourceware.org/bugzilla/show_bug.cgi?id=19329#c9 2 | --- a/elf/dl-tls.c 3 | +++ b/elf/dl-tls.c 4 | @@ -620,7 +620,7 @@ _dl_allocate_tls_init (void *result) 5 | } 6 | 7 | total += cnt; 8 | - if (total >= dtv_slots) 9 | + if (total > dtv_slots) 10 | break; 11 | 12 | /* Synchronize with dl_add_to_slotinfo. */ 13 | -------------------------------------------------------------------------------- /patches/debian/2.32: -------------------------------------------------------------------------------- 1 | 2.31 -------------------------------------------------------------------------------- /patches/debian/2.33: -------------------------------------------------------------------------------- 1 | 2.31 -------------------------------------------------------------------------------- /patches/rhel/2.28-189/unsubmitted-mathworks-0-bz17645.v2.28-rhel.patch: -------------------------------------------------------------------------------- 1 | **************************ORIGINAL HEADER***************************** 2 | From patchwork Thu Oct 21 13:41:22 2021 3 | Content-Type: text/plain; charset="utf-8" 4 | MIME-Version: 1.0 5 | Content-Transfer-Encoding: 7bit 6 | X-Patchwork-Submitter: Chung-Lin Tang 7 | X-Patchwork-Id: 46497 8 | Return-Path: 9 | X-Original-To: patchwork@sourceware.org 10 | Delivered-To: patchwork@sourceware.org 11 | Received: from server2.sourceware.org (localhost [IPv6:::1]) 12 | by sourceware.org (Postfix) with ESMTP id B66BB3857801 13 | for ; Thu, 21 Oct 2021 13:41:58 +0000 (GMT) 14 | X-Original-To: libc-alpha@sourceware.org 15 | Delivered-To: libc-alpha@sourceware.org 16 | Received: from seed.net.tw (sn15.seed.net.tw [139.175.54.15]) 17 | by sourceware.org (Postfix) with ESMTP id 3B6AC3858405 18 | for ; Thu, 21 Oct 2021 13:41:41 +0000 (GMT) 19 | DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 3B6AC3858405 20 | Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) 21 | header.from=codesourcery.com 22 | Authentication-Results: sourceware.org; 23 | spf=none smtp.mailfrom=codesourcery.com 24 | Received: from [112.104.15.59] (port=35230 helo=localhost.localdomain) 25 | by seed.net.tw with esmtp (Seednet 4.69:2) 26 | id 1mdYK9-000NfF-1W; Thu, 21 Oct 2021 21:41:37 +0800 27 | From: Chung-Lin Tang 28 | To: libc-alpha@sourceware.org, 29 | Adhemerval Zanella 30 | Subject: [PATCH v8 2/2] elf: Fix slow DSO sorting behavior in dynamic loader 31 | (BZ #17645) 32 | Date: Thu, 21 Oct 2021 21:41:22 +0800 33 | Message-Id: <20211021134122.3141-2-cltang@codesourcery.com> 34 | X-Mailer: git-send-email 2.17.1 35 | In-Reply-To: <20211021134122.3141-1-cltang@codesourcery.com> 36 | References: <20211021134122.3141-1-cltang@codesourcery.com> 37 | X-Spam-Status: No, score=-19.6 required=5.0 tests=BAYES_00, FORGED_SPF_HELO, 38 | GIT_PATCH_0, KAM_DMARC_STATUS, KAM_LAZY_DOMAIN_SECURITY, KAM_SHORT, 39 | RCVD_IN_DNSWL_LOW, SPF_HELO_PASS, SPF_NONE, 40 | TXREP autolearn=ham autolearn_force=no version=3.4.4 41 | X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on 42 | server2.sourceware.org 43 | X-BeenThere: libc-alpha@sourceware.org 44 | X-Mailman-Version: 2.1.29 45 | Precedence: list 46 | List-Id: Libc-alpha mailing list 47 | List-Unsubscribe: , 48 | 49 | List-Archive: 50 | List-Post: 51 | List-Help: 52 | List-Subscribe: , 53 | 54 | Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org 55 | Sender: "Libc-alpha" 56 | 57 | 58 | This second patch contains the actual implementation of a new sorting algorithm 59 | for shared objects in the dynamic loader, which solves the slow behavior that 60 | the current "old" algorithm falls into when the DSO set contains circular 61 | dependencies. 62 | 63 | The new algorithm implemented here is simply depth-first search (DFS) to obtain 64 | the Reverse-Post Order (RPO) sequence, a topological sort. A new l_visited:1 65 | bitfield is added to struct link_map to more elegantly facilitate such a search. 66 | 67 | The DFS algorithm is applied to the input maps[nmap-1] backwards towards 68 | maps[0]. This has the effect of a more "shallow" recursion depth in general 69 | since the input is in BFS. Also, when combined with the natural order of 70 | processing l_initfini[] at each node, this creates a resulting output sorting 71 | closer to the intuitive "left-to-right" order in most cases. 72 | 73 | Another notable implementation adjustment related to this _dl_sort_maps change 74 | is the removing of two char arrays 'used' and 'done' in _dl_close_worker to 75 | represent two per-map attributes. This has been changed to simply use two new 76 | bit-fields l_map_used:1, l_map_done:1 added to struct link_map. This also allows 77 | discarding the clunky 'used' array sorting that _dl_sort_maps had to sometimes 78 | do along the way. 79 | 80 | Tunable support for switching between different sorting algorithms at runtime is 81 | also added. A new tunable 'glibc.rtld.dynamic_sort' with current valid values 1 82 | (old algorithm) and 2 (new DFS algorithm) has been added. At time of commit 83 | of this patch, the default setting is 1 (old algorithm). 84 | 85 | Signed-off-by: Chung-Lin Tang 86 | Reviewed-by: Adhemerval Zanella 87 | ********************************************************************************** 88 | 89 | Mathworks 90 | Backport to glibc 2.28 for RHEL, remove tunable support and set default to the DFS sort map behavior 91 | 92 | Coded-by: Alan Li 93 | 94 | 95 | diff --git a/elf/dl-close.c b/elf/dl-close.c 96 | index 73b2817..cfad816 100644 97 | --- a/elf/dl-close.c 98 | +++ b/elf/dl-close.c 99 | @@ -164,8 +164,6 @@ _dl_close_worker (struct link_map *map, bool force) 100 | 101 | bool any_tls = false; 102 | const unsigned int nloaded = ns->_ns_nloaded; 103 | - char used[nloaded]; 104 | - char done[nloaded]; 105 | struct link_map *maps[nloaded]; 106 | 107 | /* Run over the list and assign indexes to the link maps and enter 108 | @@ -173,24 +171,21 @@ _dl_close_worker (struct link_map *map, bool force) 109 | int idx = 0; 110 | for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next) 111 | { 112 | - l->l_idx = idx; 113 | + l->l_map_used = 0; 114 | + l->l_map_done = 0; 115 | + l->l_idx = idx; 116 | maps[idx] = l; 117 | ++idx; 118 | - 119 | } 120 | assert (idx == nloaded); 121 | 122 | - /* Prepare the bitmaps. */ 123 | - memset (used, '\0', sizeof (used)); 124 | - memset (done, '\0', sizeof (done)); 125 | - 126 | /* Keep track of the lowest index link map we have covered already. */ 127 | int done_index = -1; 128 | while (++done_index < nloaded) 129 | { 130 | struct link_map *l = maps[done_index]; 131 | 132 | - if (done[done_index]) 133 | + if (l->l_map_done) 134 | /* Already handled. */ 135 | continue; 136 | 137 | @@ -201,12 +196,12 @@ _dl_close_worker (struct link_map *map, bool force) 138 | /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why 139 | acquire is sufficient and correct. */ 140 | && atomic_load_acquire (&l->l_tls_dtor_count) == 0 141 | - && !used[done_index]) 142 | + && !l->l_map_used) 143 | continue; 144 | 145 | /* We need this object and we handle it now. */ 146 | - done[done_index] = 1; 147 | - used[done_index] = 1; 148 | + l->l_map_used = 1; 149 | + l->l_map_done = 1; 150 | /* Signal the object is still needed. */ 151 | l->l_idx = IDX_STILL_USED; 152 | 153 | @@ -222,9 +217,9 @@ _dl_close_worker (struct link_map *map, bool force) 154 | { 155 | assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded); 156 | 157 | - if (!used[(*lp)->l_idx]) 158 | + if (!(*lp)->l_map_used) 159 | { 160 | - used[(*lp)->l_idx] = 1; 161 | + (*lp)->l_map_used = 1; 162 | /* If we marked a new object as used, and we've 163 | already processed it, then we need to go back 164 | and process again from that point forward to 165 | @@ -247,9 +242,9 @@ _dl_close_worker (struct link_map *map, bool force) 166 | { 167 | assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded); 168 | 169 | - if (!used[jmap->l_idx]) 170 | + if (!jmap->l_map_used) 171 | { 172 | - used[jmap->l_idx] = 1; 173 | + jmap->l_map_used = 1; 174 | if (jmap->l_idx - 1 < done_index) 175 | done_index = jmap->l_idx - 1; 176 | } 177 | @@ -259,8 +254,7 @@ _dl_close_worker (struct link_map *map, bool force) 178 | 179 | /* Sort the entries. We can skip looking for the binary itself which is 180 | at the front of the search list for the main namespace. */ 181 | - _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE), 182 | - used + (nsid == LM_ID_BASE), true); 183 | + _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); 184 | 185 | /* Call all termination functions at once. */ 186 | #ifdef SHARED 187 | @@ -277,7 +271,7 @@ _dl_close_worker (struct link_map *map, bool force) 188 | /* All elements must be in the same namespace. */ 189 | assert (imap->l_ns == nsid); 190 | 191 | - if (!used[i]) 192 | + if (!imap->l_map_used) 193 | { 194 | assert (imap->l_type == lt_loaded && !imap->l_nodelete_active); 195 | 196 | @@ -330,7 +324,7 @@ _dl_close_worker (struct link_map *map, bool force) 197 | if (i < first_loaded) 198 | first_loaded = i; 199 | } 200 | - /* Else used[i]. */ 201 | + /* Else imap->l_map_used. */ 202 | else if (imap->l_type == lt_loaded) 203 | { 204 | struct r_scope_elem *new_list = NULL; 205 | @@ -554,7 +548,7 @@ _dl_close_worker (struct link_map *map, bool force) 206 | for (unsigned int i = first_loaded; i < nloaded; ++i) 207 | { 208 | struct link_map *imap = maps[i]; 209 | - if (!used[i]) 210 | + if (!imap->l_map_used) 211 | { 212 | assert (imap->l_type == lt_loaded); 213 | 214 | diff --git a/elf/dl-deps.c b/elf/dl-deps.c 215 | index 087a49b212..237d9636c5 100644 216 | --- a/elf/dl-deps.c 217 | +++ b/elf/dl-deps.c 218 | @@ -613,10 +613,9 @@ Filters not supported with LD_TRACE_PRELINKING")); 219 | 220 | /* If libc.so.6 is the main map, it participates in the sort, so 221 | that the relocation order is correct regarding libc.so.6. */ 222 | - if (l_initfini[0] == GL (dl_ns)[l_initfini[0]->l_ns].libc_map) 223 | - _dl_sort_maps (l_initfini, nlist, NULL, false); 224 | - else 225 | - _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false); 226 | + _dl_sort_maps (l_initfini, nlist, 227 | + (l_initfini[0] != GL (dl_ns)[l_initfini[0]->l_ns].libc_map), 228 | + false); 229 | 230 | /* Terminate the list of dependencies. */ 231 | l_initfini[nlist] = NULL; 232 | diff --git a/elf/dl-fini.c b/elf/dl-fini.c 233 | index 226a6f0..afecbd1 100644 234 | --- a/elf/dl-fini.c 235 | +++ b/elf/dl-fini.c 236 | @@ -91,8 +91,7 @@ _dl_fini (void) 237 | /* Now we have to do the sorting. We can skip looking for the 238 | binary itself which is at the front of the search list for 239 | the main namespace. */ 240 | - _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE), 241 | - NULL, true); 242 | + _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true); 243 | 244 | /* We do not rely on the linked list of loaded object anymore 245 | from this point on. We have our own list here (maps). The 246 | diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c 247 | index b2a01ed..78d8693 100644 248 | --- a/elf/dl-sort-maps.c 249 | +++ b/elf/dl-sort-maps.c 250 | @@ -1,5 +1,5 @@ 251 | /* Sort array of link maps according to dependencies. 252 | - Copyright (C) 2017-2018 Free Software Foundation, Inc. 253 | + Copyright (C) 2017-2022 Free Software Foundation, Inc. 254 | This file is part of the GNU C Library. 255 | 256 | The GNU C Library is free software; you can redistribute it and/or 257 | @@ -14,109 +14,172 @@ 258 | 259 | You should have received a copy of the GNU Lesser General Public 260 | License along with the GNU C Library; if not, see 261 | - . */ 262 | + . */ 263 | 264 | +#include 265 | #include 266 | 267 | 268 | -/* Sort array MAPS according to dependencies of the contained objects. 269 | - Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is 270 | - called for finishing an object. */ 271 | -void 272 | -_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, 273 | - bool for_fini) 274 | +/* We use a recursive function due to its better clarity and ease of 275 | + implementation, as well as faster execution speed. We already use 276 | + alloca() for list allocation during the breadth-first search of 277 | + dependencies in _dl_map_object_deps(), and this should be on the 278 | + same order of worst-case stack usage. 279 | + 280 | + Note: the '*rpo' parameter is supposed to point to one past the 281 | + last element of the array where we save the sort results, and is 282 | + decremented before storing the current map at each level. */ 283 | + 284 | +static void 285 | +dfs_traversal (struct link_map ***rpo, struct link_map *map, 286 | + bool *do_reldeps) 287 | { 288 | - /* A list of one element need not be sorted. */ 289 | - if (nmaps <= 1) 290 | + /* _dl_map_object_deps ignores l_faked objects when calculating the 291 | + number of maps before calling _dl_sort_maps, ignore them as well. */ 292 | + if (map->l_visited || map->l_faked) 293 | return; 294 | 295 | - unsigned int i = 0; 296 | - uint16_t seen[nmaps]; 297 | - memset (seen, 0, nmaps * sizeof (seen[0])); 298 | - while (1) 299 | - { 300 | - /* Keep track of which object we looked at this round. */ 301 | - ++seen[i]; 302 | - struct link_map *thisp = maps[i]; 303 | + map->l_visited = 1; 304 | 305 | - if (__glibc_unlikely (for_fini)) 306 | + if (map->l_initfini) 307 | + { 308 | + for (int i = 0; map->l_initfini[i] != NULL; i++) 309 | { 310 | - /* Do not handle ld.so in secondary namespaces and objects which 311 | - are not removed. */ 312 | - if (thisp != thisp->l_real || thisp->l_idx == -1) 313 | - goto skip; 314 | + struct link_map *dep = map->l_initfini[i]; 315 | + if (dep->l_visited == 0 316 | + && dep->l_main_map == 0) 317 | + dfs_traversal (rpo, dep, do_reldeps); 318 | } 319 | + } 320 | 321 | - /* Find the last object in the list for which the current one is 322 | - a dependency and move the current object behind the object 323 | - with the dependency. */ 324 | - unsigned int k = nmaps - 1; 325 | - while (k > i) 326 | + if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL)) 327 | + { 328 | + /* Indicate that we encountered relocation dependencies during 329 | + traversal. */ 330 | + *do_reldeps = true; 331 | + 332 | + for (int m = map->l_reldeps->act - 1; m >= 0; m--) 333 | { 334 | - struct link_map **runp = maps[k]->l_initfini; 335 | - if (runp != NULL) 336 | - /* Look through the dependencies of the object. */ 337 | - while (*runp != NULL) 338 | - if (__glibc_unlikely (*runp++ == thisp)) 339 | - { 340 | - move: 341 | - /* Move the current object to the back past the last 342 | - object with it as the dependency. */ 343 | - memmove (&maps[i], &maps[i + 1], 344 | - (k - i) * sizeof (maps[0])); 345 | - maps[k] = thisp; 346 | - 347 | - if (used != NULL) 348 | - { 349 | - char here_used = used[i]; 350 | - memmove (&used[i], &used[i + 1], 351 | - (k - i) * sizeof (used[0])); 352 | - used[k] = here_used; 353 | - } 354 | - 355 | - if (seen[i + 1] > nmaps - i) 356 | - { 357 | - ++i; 358 | - goto next_clear; 359 | - } 360 | - 361 | - uint16_t this_seen = seen[i]; 362 | - memmove (&seen[i], &seen[i + 1], (k - i) * sizeof (seen[0])); 363 | - seen[k] = this_seen; 364 | - 365 | - goto next; 366 | - } 367 | - 368 | - if (__glibc_unlikely (for_fini && maps[k]->l_reldeps != NULL)) 369 | - { 370 | - unsigned int m = maps[k]->l_reldeps->act; 371 | - struct link_map **relmaps = &maps[k]->l_reldeps->list[0]; 372 | - 373 | - /* Look through the relocation dependencies of the object. */ 374 | - while (m-- > 0) 375 | - if (__glibc_unlikely (relmaps[m] == thisp)) 376 | - { 377 | - /* If a cycle exists with a link time dependency, 378 | - preserve the latter. */ 379 | - struct link_map **runp = thisp->l_initfini; 380 | - if (runp != NULL) 381 | - while (*runp != NULL) 382 | - if (__glibc_unlikely (*runp++ == maps[k])) 383 | - goto ignore; 384 | - goto move; 385 | - } 386 | - ignore:; 387 | - } 388 | - 389 | - --k; 390 | + struct link_map *dep = map->l_reldeps->list[m]; 391 | + if (dep->l_visited == 0 392 | + && dep->l_main_map == 0) 393 | + dfs_traversal (rpo, dep, do_reldeps); 394 | } 395 | + } 396 | + 397 | + *rpo -= 1; 398 | + **rpo = map; 399 | +} 400 | 401 | - skip: 402 | - if (++i == nmaps) 403 | - break; 404 | - next_clear: 405 | - memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 406 | +/* Topologically sort array MAPS according to dependencies of the contained 407 | + objects. */ 408 | 409 | - next:; 410 | +static void 411 | +_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps, 412 | + unsigned int skip __attribute__ ((unused)), bool for_fini) 413 | +{ 414 | + for (int i = nmaps - 1; i >= 0; i--) 415 | + maps[i]->l_visited = 0; 416 | + 417 | + /* We apply DFS traversal for each of maps[i] until the whole total order 418 | + is found and we're at the start of the Reverse-Postorder (RPO) sequence, 419 | + which is a topological sort. 420 | + 421 | + We go from maps[nmaps - 1] backwards towards maps[0] at this level. 422 | + Due to the breadth-first search (BFS) ordering we receive, going 423 | + backwards usually gives a more shallow depth-first recursion depth, 424 | + adding more stack usage safety. Also, combined with the natural 425 | + processing order of l_initfini[] at each node during DFS, this maintains 426 | + an ordering closer to the original link ordering in the sorting results 427 | + under most simpler cases. 428 | + 429 | + Another reason we order the top level backwards, it that maps[0] is 430 | + usually exactly the main object of which we're in the midst of 431 | + _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still 432 | + blank. If we start the traversal from maps[0], since having no 433 | + dependencies yet filled in, maps[0] will always be immediately 434 | + incorrectly placed at the last place in the order (first in reverse). 435 | + Adjusting the order so that maps[0] is last traversed naturally avoids 436 | + this problem. 437 | + 438 | + Further, the old "optimization" of skipping the main object at maps[0] 439 | + from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general 440 | + no longer valid, since traversing along object dependency-links 441 | + may "find" the main object even when it is not included in the initial 442 | + order (e.g. a dlopen()'ed shared object can have circular dependencies 443 | + linked back to itself). In such a case, traversing N-1 objects will 444 | + create a N-object result, and raise problems. 445 | + 446 | + To summarize, just passing in the full list, and iterating from back 447 | + to front makes things much more straightforward. */ 448 | + 449 | + /* Array to hold RPO sorting results, before we copy back to maps[]. */ 450 | + struct link_map *rpo[nmaps]; 451 | + 452 | + /* The 'head' position during each DFS iteration. Note that we start at 453 | + one past the last element due to first-decrement-then-store (see the 454 | + bottom of above dfs_traversal() routine). */ 455 | + struct link_map **rpo_head = &rpo[nmaps]; 456 | + 457 | + bool do_reldeps = false; 458 | + bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL); 459 | + 460 | + for (int i = nmaps - 1; i >= 0; i--) 461 | + { 462 | + dfs_traversal (&rpo_head, maps[i], do_reldeps_ref); 463 | + 464 | + /* We can break early if all objects are already placed. */ 465 | + if (rpo_head == rpo) 466 | + goto end; 467 | + } 468 | + assert (rpo_head == rpo); 469 | + 470 | + end: 471 | + /* Here we may do a second pass of sorting, using only l_initfini[] 472 | + static dependency links. This is avoided if !FOR_FINI or if we didn't 473 | + find any reldeps in the first DFS traversal. 474 | + 475 | + The reason we do this is: while it is unspecified how circular 476 | + dependencies should be handled, the presumed reasonable behavior is to 477 | + have destructors to respect static dependency links as much as possible, 478 | + overriding reldeps if needed. And the first sorting pass, which takes 479 | + l_initfini/l_reldeps links equally, may not preserve this priority. 480 | + 481 | + Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account 482 | + (see how the do_reldeps argument to dfs_traversal() is NULL below). */ 483 | + if (do_reldeps) 484 | + { 485 | + for (int i = nmaps - 1; i >= 0; i--) 486 | + rpo[i]->l_visited = 0; 487 | + 488 | + struct link_map **maps_head = &maps[nmaps]; 489 | + for (int i = nmaps - 1; i >= 0; i--) 490 | + { 491 | + dfs_traversal (&maps_head, rpo[i], NULL); 492 | + 493 | + /* We can break early if all objects are already placed. 494 | + The below memcpy is not needed in the do_reldeps case here, 495 | + since we wrote back to maps[] during DFS traversal. */ 496 | + if (maps_head == maps) 497 | + return; 498 | + } 499 | + assert (maps_head == maps); 500 | + return; 501 | } 502 | + 503 | + memcpy (maps, rpo, sizeof (struct link_map *) * nmaps); 504 | +} 505 | + 506 | +void 507 | +_dl_sort_maps (struct link_map **maps, unsigned int nmaps, 508 | + unsigned int skip, bool for_fini) 509 | +{ 510 | + /* It can be tempting to use a static function pointer to store and call 511 | + the current selected sorting algorithm routine, but experimentation 512 | + shows that current processors still do not handle indirect branches 513 | + that efficiently, plus a static function pointer will involve 514 | + PTR_MANGLE/DEMANGLE, further impairing performance of small, common 515 | + input cases. A simple if-case with direct function calls appears to 516 | + be the fastest. */ 517 | + _dl_sort_maps_dfs (maps, nmaps, skip, for_fini); 518 | } 519 | diff --git a/elf/rtld.c b/elf/rtld.c 520 | index e0752eb..e1d056b 100644 521 | --- a/elf/rtld.c 522 | +++ b/elf/rtld.c 523 | @@ -1340,6 +1340,9 @@ of this helper program; chances are you did not intend to run this program.\n\ 524 | main_map->l_name = (char *) ""; 525 | *user_entry = main_map->l_entry; 526 | 527 | + /* Set bit indicating this is the main program map. */ 528 | + main_map->l_main_map = 1; 529 | + 530 | #ifdef HAVE_AUX_VECTOR 531 | /* Adjust the on-stack auxiliary vector so that it looks like the 532 | binary was executed directly. */ 533 | diff --git a/include/link.h b/include/link.h 534 | index aea2684..3a87694 100644 535 | --- a/include/link.h 536 | +++ b/include/link.h 537 | @@ -177,6 +177,11 @@ struct link_map 538 | unsigned int l_init_called:1; /* Nonzero if DT_INIT function called. */ 539 | unsigned int l_global:1; /* Nonzero if object in _dl_global_scope. */ 540 | unsigned int l_reserved:2; /* Reserved for internal use. */ 541 | + unsigned int l_main_map:1; /* Nonzero for the map of the main program. */ 542 | + unsigned int l_visited:1; /* Used internally for map dependency 543 | + graph traversal. */ 544 | + unsigned int l_map_used:1; /* These two bits are used during traversal */ 545 | + unsigned int l_map_done:1; /* of maps in _dl_close_worker. */ 546 | unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed 547 | to by `l_phdr' is allocated. */ 548 | unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in 549 | diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h 550 | index d7e1515..5bed748 100644 551 | --- a/sysdeps/generic/ldsodefs.h 552 | +++ b/sysdeps/generic/ldsodefs.h 553 | @@ -1010,7 +1010,7 @@ extern void _dl_fini (void) attribute_hidden; 554 | 555 | /* Sort array MAPS according to dependencies of the contained objects. */ 556 | extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 557 | - char *used, bool for_fini) attribute_hidden; 558 | + unsigned int skip, bool for_fini) attribute_hidden; 559 | 560 | /* The dynamic linker calls this function before and having changing 561 | any shared object mappings. The `r_state' member of `struct r_debug' 562 | -------------------------------------------------------------------------------- /patches/rhel/2.28/unsubmitted-mathworks-0-bz17645.v2.28-rhel.patch: -------------------------------------------------------------------------------- 1 | **************************ORIGINAL HEADER***************************** 2 | From patchwork Thu Oct 21 13:41:22 2021 3 | Content-Type: text/plain; charset="utf-8" 4 | MIME-Version: 1.0 5 | Content-Transfer-Encoding: 7bit 6 | X-Patchwork-Submitter: Chung-Lin Tang 7 | X-Patchwork-Id: 46497 8 | Return-Path: 9 | X-Original-To: patchwork@sourceware.org 10 | Delivered-To: patchwork@sourceware.org 11 | Received: from server2.sourceware.org (localhost [IPv6:::1]) 12 | by sourceware.org (Postfix) with ESMTP id B66BB3857801 13 | for ; Thu, 21 Oct 2021 13:41:58 +0000 (GMT) 14 | X-Original-To: libc-alpha@sourceware.org 15 | Delivered-To: libc-alpha@sourceware.org 16 | Received: from seed.net.tw (sn15.seed.net.tw [139.175.54.15]) 17 | by sourceware.org (Postfix) with ESMTP id 3B6AC3858405 18 | for ; Thu, 21 Oct 2021 13:41:41 +0000 (GMT) 19 | DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 3B6AC3858405 20 | Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) 21 | header.from=codesourcery.com 22 | Authentication-Results: sourceware.org; 23 | spf=none smtp.mailfrom=codesourcery.com 24 | Received: from [112.104.15.59] (port=35230 helo=localhost.localdomain) 25 | by seed.net.tw with esmtp (Seednet 4.69:2) 26 | id 1mdYK9-000NfF-1W; Thu, 21 Oct 2021 21:41:37 +0800 27 | From: Chung-Lin Tang 28 | To: libc-alpha@sourceware.org, 29 | Adhemerval Zanella 30 | Subject: [PATCH v8 2/2] elf: Fix slow DSO sorting behavior in dynamic loader 31 | (BZ #17645) 32 | Date: Thu, 21 Oct 2021 21:41:22 +0800 33 | Message-Id: <20211021134122.3141-2-cltang@codesourcery.com> 34 | X-Mailer: git-send-email 2.17.1 35 | In-Reply-To: <20211021134122.3141-1-cltang@codesourcery.com> 36 | References: <20211021134122.3141-1-cltang@codesourcery.com> 37 | X-Spam-Status: No, score=-19.6 required=5.0 tests=BAYES_00, FORGED_SPF_HELO, 38 | GIT_PATCH_0, KAM_DMARC_STATUS, KAM_LAZY_DOMAIN_SECURITY, KAM_SHORT, 39 | RCVD_IN_DNSWL_LOW, SPF_HELO_PASS, SPF_NONE, 40 | TXREP autolearn=ham autolearn_force=no version=3.4.4 41 | X-Spam-Checker-Version: SpamAssassin 3.4.4 (2020-01-24) on 42 | server2.sourceware.org 43 | X-BeenThere: libc-alpha@sourceware.org 44 | X-Mailman-Version: 2.1.29 45 | Precedence: list 46 | List-Id: Libc-alpha mailing list 47 | List-Unsubscribe: , 48 | 49 | List-Archive: 50 | List-Post: 51 | List-Help: 52 | List-Subscribe: , 53 | 54 | Errors-To: libc-alpha-bounces+patchwork=sourceware.org@sourceware.org 55 | Sender: "Libc-alpha" 56 | 57 | 58 | This second patch contains the actual implementation of a new sorting algorithm 59 | for shared objects in the dynamic loader, which solves the slow behavior that 60 | the current "old" algorithm falls into when the DSO set contains circular 61 | dependencies. 62 | 63 | The new algorithm implemented here is simply depth-first search (DFS) to obtain 64 | the Reverse-Post Order (RPO) sequence, a topological sort. A new l_visited:1 65 | bitfield is added to struct link_map to more elegantly facilitate such a search. 66 | 67 | The DFS algorithm is applied to the input maps[nmap-1] backwards towards 68 | maps[0]. This has the effect of a more "shallow" recursion depth in general 69 | since the input is in BFS. Also, when combined with the natural order of 70 | processing l_initfini[] at each node, this creates a resulting output sorting 71 | closer to the intuitive "left-to-right" order in most cases. 72 | 73 | Another notable implementation adjustment related to this _dl_sort_maps change 74 | is the removing of two char arrays 'used' and 'done' in _dl_close_worker to 75 | represent two per-map attributes. This has been changed to simply use two new 76 | bit-fields l_map_used:1, l_map_done:1 added to struct link_map. This also allows 77 | discarding the clunky 'used' array sorting that _dl_sort_maps had to sometimes 78 | do along the way. 79 | 80 | Tunable support for switching between different sorting algorithms at runtime is 81 | also added. A new tunable 'glibc.rtld.dynamic_sort' with current valid values 1 82 | (old algorithm) and 2 (new DFS algorithm) has been added. At time of commit 83 | of this patch, the default setting is 1 (old algorithm). 84 | 85 | Signed-off-by: Chung-Lin Tang 86 | Reviewed-by: Adhemerval Zanella 87 | ********************************************************************************** 88 | 89 | Mathworks 90 | Backport to glibc 2.28 for RHEL, remove tunable support and set default to the DFS sort map behavior 91 | 92 | Coded-by: Alan Li 93 | 94 | 95 | diff --git a/elf/dl-close.c b/elf/dl-close.c 96 | index 73b2817..cfad816 100644 97 | --- a/elf/dl-close.c 98 | +++ b/elf/dl-close.c 99 | @@ -164,8 +164,6 @@ _dl_close_worker (struct link_map *map, bool force) 100 | 101 | bool any_tls = false; 102 | const unsigned int nloaded = ns->_ns_nloaded; 103 | - char used[nloaded]; 104 | - char done[nloaded]; 105 | struct link_map *maps[nloaded]; 106 | 107 | /* Run over the list and assign indexes to the link maps and enter 108 | @@ -173,24 +171,21 @@ _dl_close_worker (struct link_map *map, bool force) 109 | int idx = 0; 110 | for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next) 111 | { 112 | - l->l_idx = idx; 113 | + l->l_map_used = 0; 114 | + l->l_map_done = 0; 115 | + l->l_idx = idx; 116 | maps[idx] = l; 117 | ++idx; 118 | - 119 | } 120 | assert (idx == nloaded); 121 | 122 | - /* Prepare the bitmaps. */ 123 | - memset (used, '\0', sizeof (used)); 124 | - memset (done, '\0', sizeof (done)); 125 | - 126 | /* Keep track of the lowest index link map we have covered already. */ 127 | int done_index = -1; 128 | while (++done_index < nloaded) 129 | { 130 | struct link_map *l = maps[done_index]; 131 | 132 | - if (done[done_index]) 133 | + if (l->l_map_done) 134 | /* Already handled. */ 135 | continue; 136 | 137 | @@ -201,12 +196,12 @@ _dl_close_worker (struct link_map *map, bool force) 138 | /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why 139 | acquire is sufficient and correct. */ 140 | && atomic_load_acquire (&l->l_tls_dtor_count) == 0 141 | - && !used[done_index]) 142 | + && !l->l_map_used) 143 | continue; 144 | 145 | /* We need this object and we handle it now. */ 146 | - done[done_index] = 1; 147 | - used[done_index] = 1; 148 | + l->l_map_used = 1; 149 | + l->l_map_done = 1; 150 | /* Signal the object is still needed. */ 151 | l->l_idx = IDX_STILL_USED; 152 | 153 | @@ -222,9 +217,9 @@ _dl_close_worker (struct link_map *map, bool force) 154 | { 155 | assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded); 156 | 157 | - if (!used[(*lp)->l_idx]) 158 | + if (!(*lp)->l_map_used) 159 | { 160 | - used[(*lp)->l_idx] = 1; 161 | + (*lp)->l_map_used = 1; 162 | /* If we marked a new object as used, and we've 163 | already processed it, then we need to go back 164 | and process again from that point forward to 165 | @@ -247,9 +242,9 @@ _dl_close_worker (struct link_map *map, bool force) 166 | { 167 | assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded); 168 | 169 | - if (!used[jmap->l_idx]) 170 | + if (!jmap->l_map_used) 171 | { 172 | - used[jmap->l_idx] = 1; 173 | + jmap->l_map_used = 1; 174 | if (jmap->l_idx - 1 < done_index) 175 | done_index = jmap->l_idx - 1; 176 | } 177 | @@ -259,8 +254,7 @@ _dl_close_worker (struct link_map *map, bool force) 178 | 179 | /* Sort the entries. We can skip looking for the binary itself which is 180 | at the front of the search list for the main namespace. */ 181 | - _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE), 182 | - used + (nsid == LM_ID_BASE), true); 183 | + _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true); 184 | 185 | /* Call all termination functions at once. */ 186 | #ifdef SHARED 187 | @@ -277,7 +271,7 @@ _dl_close_worker (struct link_map *map, bool force) 188 | /* All elements must be in the same namespace. */ 189 | assert (imap->l_ns == nsid); 190 | 191 | - if (!used[i]) 192 | + if (!imap->l_map_used) 193 | { 194 | assert (imap->l_type == lt_loaded && !imap->l_nodelete_active); 195 | 196 | @@ -330,7 +324,7 @@ _dl_close_worker (struct link_map *map, bool force) 197 | if (i < first_loaded) 198 | first_loaded = i; 199 | } 200 | - /* Else used[i]. */ 201 | + /* Else imap->l_map_used. */ 202 | else if (imap->l_type == lt_loaded) 203 | { 204 | struct r_scope_elem *new_list = NULL; 205 | @@ -554,7 +548,7 @@ _dl_close_worker (struct link_map *map, bool force) 206 | for (unsigned int i = first_loaded; i < nloaded; ++i) 207 | { 208 | struct link_map *imap = maps[i]; 209 | - if (!used[i]) 210 | + if (!imap->l_map_used) 211 | { 212 | assert (imap->l_type == lt_loaded); 213 | 214 | diff --git a/elf/dl-deps.c b/elf/dl-deps.c 215 | index 087a49b212..237d9636c5 100644 216 | --- a/elf/dl-deps.c 217 | +++ b/elf/dl-deps.c 218 | @@ -613,10 +613,9 @@ Filters not supported with LD_TRACE_PRELINKING")); 219 | 220 | /* If libc.so.6 is the main map, it participates in the sort, so 221 | that the relocation order is correct regarding libc.so.6. */ 222 | - if (l_initfini[0] == GL (dl_ns)[l_initfini[0]->l_ns].libc_map) 223 | - _dl_sort_maps (l_initfini, nlist, NULL, false); 224 | - else 225 | - _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false); 226 | + _dl_sort_maps (l_initfini, nlist, 227 | + (l_initfini[0] != GL (dl_ns)[l_initfini[0]->l_ns].libc_map), 228 | + false); 229 | 230 | /* Terminate the list of dependencies. */ 231 | l_initfini[nlist] = NULL; 232 | diff --git a/elf/dl-fini.c b/elf/dl-fini.c 233 | index 226a6f0..afecbd1 100644 234 | --- a/elf/dl-fini.c 235 | +++ b/elf/dl-fini.c 236 | @@ -91,8 +91,7 @@ _dl_fini (void) 237 | /* Now we have to do the sorting. We can skip looking for the 238 | binary itself which is at the front of the search list for 239 | the main namespace. */ 240 | - _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE), 241 | - NULL, true); 242 | + _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true); 243 | 244 | /* We do not rely on the linked list of loaded object anymore 245 | from this point on. We have our own list here (maps). The 246 | diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c 247 | index b2a01ed..78d8693 100644 248 | --- a/elf/dl-sort-maps.c 249 | +++ b/elf/dl-sort-maps.c 250 | @@ -1,5 +1,5 @@ 251 | /* Sort array of link maps according to dependencies. 252 | - Copyright (C) 2017-2018 Free Software Foundation, Inc. 253 | + Copyright (C) 2017-2022 Free Software Foundation, Inc. 254 | This file is part of the GNU C Library. 255 | 256 | The GNU C Library is free software; you can redistribute it and/or 257 | @@ -14,109 +14,172 @@ 258 | 259 | You should have received a copy of the GNU Lesser General Public 260 | License along with the GNU C Library; if not, see 261 | - . */ 262 | + . */ 263 | 264 | +#include 265 | #include 266 | 267 | 268 | -/* Sort array MAPS according to dependencies of the contained objects. 269 | - Array USED, if non-NULL, is permutated along MAPS. If FOR_FINI this is 270 | - called for finishing an object. */ 271 | -void 272 | -_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used, 273 | - bool for_fini) 274 | +/* We use a recursive function due to its better clarity and ease of 275 | + implementation, as well as faster execution speed. We already use 276 | + alloca() for list allocation during the breadth-first search of 277 | + dependencies in _dl_map_object_deps(), and this should be on the 278 | + same order of worst-case stack usage. 279 | + 280 | + Note: the '*rpo' parameter is supposed to point to one past the 281 | + last element of the array where we save the sort results, and is 282 | + decremented before storing the current map at each level. */ 283 | + 284 | +static void 285 | +dfs_traversal (struct link_map ***rpo, struct link_map *map, 286 | + bool *do_reldeps) 287 | { 288 | - /* A list of one element need not be sorted. */ 289 | - if (nmaps <= 1) 290 | + /* _dl_map_object_deps ignores l_faked objects when calculating the 291 | + number of maps before calling _dl_sort_maps, ignore them as well. */ 292 | + if (map->l_visited || map->l_faked) 293 | return; 294 | 295 | - unsigned int i = 0; 296 | - uint16_t seen[nmaps]; 297 | - memset (seen, 0, nmaps * sizeof (seen[0])); 298 | - while (1) 299 | - { 300 | - /* Keep track of which object we looked at this round. */ 301 | - ++seen[i]; 302 | - struct link_map *thisp = maps[i]; 303 | + map->l_visited = 1; 304 | 305 | - if (__glibc_unlikely (for_fini)) 306 | + if (map->l_initfini) 307 | + { 308 | + for (int i = 0; map->l_initfini[i] != NULL; i++) 309 | { 310 | - /* Do not handle ld.so in secondary namespaces and objects which 311 | - are not removed. */ 312 | - if (thisp != thisp->l_real || thisp->l_idx == -1) 313 | - goto skip; 314 | + struct link_map *dep = map->l_initfini[i]; 315 | + if (dep->l_visited == 0 316 | + && dep->l_main_map == 0) 317 | + dfs_traversal (rpo, dep, do_reldeps); 318 | } 319 | + } 320 | 321 | - /* Find the last object in the list for which the current one is 322 | - a dependency and move the current object behind the object 323 | - with the dependency. */ 324 | - unsigned int k = nmaps - 1; 325 | - while (k > i) 326 | + if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL)) 327 | + { 328 | + /* Indicate that we encountered relocation dependencies during 329 | + traversal. */ 330 | + *do_reldeps = true; 331 | + 332 | + for (int m = map->l_reldeps->act - 1; m >= 0; m--) 333 | { 334 | - struct link_map **runp = maps[k]->l_initfini; 335 | - if (runp != NULL) 336 | - /* Look through the dependencies of the object. */ 337 | - while (*runp != NULL) 338 | - if (__glibc_unlikely (*runp++ == thisp)) 339 | - { 340 | - move: 341 | - /* Move the current object to the back past the last 342 | - object with it as the dependency. */ 343 | - memmove (&maps[i], &maps[i + 1], 344 | - (k - i) * sizeof (maps[0])); 345 | - maps[k] = thisp; 346 | - 347 | - if (used != NULL) 348 | - { 349 | - char here_used = used[i]; 350 | - memmove (&used[i], &used[i + 1], 351 | - (k - i) * sizeof (used[0])); 352 | - used[k] = here_used; 353 | - } 354 | - 355 | - if (seen[i + 1] > nmaps - i) 356 | - { 357 | - ++i; 358 | - goto next_clear; 359 | - } 360 | - 361 | - uint16_t this_seen = seen[i]; 362 | - memmove (&seen[i], &seen[i + 1], (k - i) * sizeof (seen[0])); 363 | - seen[k] = this_seen; 364 | - 365 | - goto next; 366 | - } 367 | - 368 | - if (__glibc_unlikely (for_fini && maps[k]->l_reldeps != NULL)) 369 | - { 370 | - unsigned int m = maps[k]->l_reldeps->act; 371 | - struct link_map **relmaps = &maps[k]->l_reldeps->list[0]; 372 | - 373 | - /* Look through the relocation dependencies of the object. */ 374 | - while (m-- > 0) 375 | - if (__glibc_unlikely (relmaps[m] == thisp)) 376 | - { 377 | - /* If a cycle exists with a link time dependency, 378 | - preserve the latter. */ 379 | - struct link_map **runp = thisp->l_initfini; 380 | - if (runp != NULL) 381 | - while (*runp != NULL) 382 | - if (__glibc_unlikely (*runp++ == maps[k])) 383 | - goto ignore; 384 | - goto move; 385 | - } 386 | - ignore:; 387 | - } 388 | - 389 | - --k; 390 | + struct link_map *dep = map->l_reldeps->list[m]; 391 | + if (dep->l_visited == 0 392 | + && dep->l_main_map == 0) 393 | + dfs_traversal (rpo, dep, do_reldeps); 394 | } 395 | + } 396 | + 397 | + *rpo -= 1; 398 | + **rpo = map; 399 | +} 400 | 401 | - skip: 402 | - if (++i == nmaps) 403 | - break; 404 | - next_clear: 405 | - memset (&seen[i], 0, (nmaps - i) * sizeof (seen[0])); 406 | +/* Topologically sort array MAPS according to dependencies of the contained 407 | + objects. */ 408 | 409 | - next:; 410 | +static void 411 | +_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps, 412 | + unsigned int skip __attribute__ ((unused)), bool for_fini) 413 | +{ 414 | + for (int i = nmaps - 1; i >= 0; i--) 415 | + maps[i]->l_visited = 0; 416 | + 417 | + /* We apply DFS traversal for each of maps[i] until the whole total order 418 | + is found and we're at the start of the Reverse-Postorder (RPO) sequence, 419 | + which is a topological sort. 420 | + 421 | + We go from maps[nmaps - 1] backwards towards maps[0] at this level. 422 | + Due to the breadth-first search (BFS) ordering we receive, going 423 | + backwards usually gives a more shallow depth-first recursion depth, 424 | + adding more stack usage safety. Also, combined with the natural 425 | + processing order of l_initfini[] at each node during DFS, this maintains 426 | + an ordering closer to the original link ordering in the sorting results 427 | + under most simpler cases. 428 | + 429 | + Another reason we order the top level backwards, it that maps[0] is 430 | + usually exactly the main object of which we're in the midst of 431 | + _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still 432 | + blank. If we start the traversal from maps[0], since having no 433 | + dependencies yet filled in, maps[0] will always be immediately 434 | + incorrectly placed at the last place in the order (first in reverse). 435 | + Adjusting the order so that maps[0] is last traversed naturally avoids 436 | + this problem. 437 | + 438 | + Further, the old "optimization" of skipping the main object at maps[0] 439 | + from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general 440 | + no longer valid, since traversing along object dependency-links 441 | + may "find" the main object even when it is not included in the initial 442 | + order (e.g. a dlopen()'ed shared object can have circular dependencies 443 | + linked back to itself). In such a case, traversing N-1 objects will 444 | + create a N-object result, and raise problems. 445 | + 446 | + To summarize, just passing in the full list, and iterating from back 447 | + to front makes things much more straightforward. */ 448 | + 449 | + /* Array to hold RPO sorting results, before we copy back to maps[]. */ 450 | + struct link_map *rpo[nmaps]; 451 | + 452 | + /* The 'head' position during each DFS iteration. Note that we start at 453 | + one past the last element due to first-decrement-then-store (see the 454 | + bottom of above dfs_traversal() routine). */ 455 | + struct link_map **rpo_head = &rpo[nmaps]; 456 | + 457 | + bool do_reldeps = false; 458 | + bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL); 459 | + 460 | + for (int i = nmaps - 1; i >= 0; i--) 461 | + { 462 | + dfs_traversal (&rpo_head, maps[i], do_reldeps_ref); 463 | + 464 | + /* We can break early if all objects are already placed. */ 465 | + if (rpo_head == rpo) 466 | + goto end; 467 | + } 468 | + assert (rpo_head == rpo); 469 | + 470 | + end: 471 | + /* Here we may do a second pass of sorting, using only l_initfini[] 472 | + static dependency links. This is avoided if !FOR_FINI or if we didn't 473 | + find any reldeps in the first DFS traversal. 474 | + 475 | + The reason we do this is: while it is unspecified how circular 476 | + dependencies should be handled, the presumed reasonable behavior is to 477 | + have destructors to respect static dependency links as much as possible, 478 | + overriding reldeps if needed. And the first sorting pass, which takes 479 | + l_initfini/l_reldeps links equally, may not preserve this priority. 480 | + 481 | + Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account 482 | + (see how the do_reldeps argument to dfs_traversal() is NULL below). */ 483 | + if (do_reldeps) 484 | + { 485 | + for (int i = nmaps - 1; i >= 0; i--) 486 | + rpo[i]->l_visited = 0; 487 | + 488 | + struct link_map **maps_head = &maps[nmaps]; 489 | + for (int i = nmaps - 1; i >= 0; i--) 490 | + { 491 | + dfs_traversal (&maps_head, rpo[i], NULL); 492 | + 493 | + /* We can break early if all objects are already placed. 494 | + The below memcpy is not needed in the do_reldeps case here, 495 | + since we wrote back to maps[] during DFS traversal. */ 496 | + if (maps_head == maps) 497 | + return; 498 | + } 499 | + assert (maps_head == maps); 500 | + return; 501 | } 502 | + 503 | + memcpy (maps, rpo, sizeof (struct link_map *) * nmaps); 504 | +} 505 | + 506 | +void 507 | +_dl_sort_maps (struct link_map **maps, unsigned int nmaps, 508 | + unsigned int skip, bool for_fini) 509 | +{ 510 | + /* It can be tempting to use a static function pointer to store and call 511 | + the current selected sorting algorithm routine, but experimentation 512 | + shows that current processors still do not handle indirect branches 513 | + that efficiently, plus a static function pointer will involve 514 | + PTR_MANGLE/DEMANGLE, further impairing performance of small, common 515 | + input cases. A simple if-case with direct function calls appears to 516 | + be the fastest. */ 517 | + _dl_sort_maps_dfs (maps, nmaps, skip, for_fini); 518 | } 519 | diff --git a/elf/rtld.c b/elf/rtld.c 520 | index e0752eb..e1d056b 100644 521 | --- a/elf/rtld.c 522 | +++ b/elf/rtld.c 523 | @@ -1340,6 +1340,9 @@ of this helper program; chances are you did not intend to run this program.\n\ 524 | main_map->l_name = (char *) ""; 525 | *user_entry = main_map->l_entry; 526 | 527 | + /* Set bit indicating this is the main program map. */ 528 | + main_map->l_main_map = 1; 529 | + 530 | #ifdef HAVE_AUX_VECTOR 531 | /* Adjust the on-stack auxiliary vector so that it looks like the 532 | binary was executed directly. */ 533 | diff --git a/include/link.h b/include/link.h 534 | index aea2684..3a87694 100644 535 | --- a/include/link.h 536 | +++ b/include/link.h 537 | @@ -177,6 +177,11 @@ struct link_map 538 | unsigned int l_init_called:1; /* Nonzero if DT_INIT function called. */ 539 | unsigned int l_global:1; /* Nonzero if object in _dl_global_scope. */ 540 | unsigned int l_reserved:2; /* Reserved for internal use. */ 541 | + unsigned int l_main_map:1; /* Nonzero for the map of the main program. */ 542 | + unsigned int l_visited:1; /* Used internally for map dependency 543 | + graph traversal. */ 544 | + unsigned int l_map_used:1; /* These two bits are used during traversal */ 545 | + unsigned int l_map_done:1; /* of maps in _dl_close_worker. */ 546 | unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed 547 | to by `l_phdr' is allocated. */ 548 | unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in 549 | diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h 550 | index d7e1515..5bed748 100644 551 | --- a/sysdeps/generic/ldsodefs.h 552 | +++ b/sysdeps/generic/ldsodefs.h 553 | @@ -1010,7 +1010,7 @@ extern void _dl_fini (void) attribute_hidden; 554 | 555 | /* Sort array MAPS according to dependencies of the contained objects. */ 556 | extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps, 557 | - char *used, bool for_fini) attribute_hidden; 558 | + unsigned int skip, bool for_fini) attribute_hidden; 559 | 560 | /* The dynamic linker calls this function before and having changing 561 | any shared object mappings. The `r_state' member of `struct r_debug' 562 | -------------------------------------------------------------------------------- /patches/rhel/2.28/unsubmitted-mathworks-glibc-bz19329-1-of-2.el8.patch: -------------------------------------------------------------------------------- 1 | The content of this file is derived from original patches published to libc-alpha at sourceware dot org 2 | by Szabolcs Nagy. The original, unmodified, version is available in the patches/debian/2.27 folder of 3 | this repository or from https://sourceware.org/legacy-ml/libc-alpha/2016-11/msg01092.html 4 | 5 | The original content has been adapted by MathWorks to enable patching glibc 2.28 since there were 6 | source code changes in glibc since the original patches were constructed 7 | 8 | NOTE: ADAPTED TO PATCH v2.28 via rpmbuild 9 | 10 | From d66a4500aaf93d7a59b1ea668a60ff17c84d8d26 Mon Sep 17 00:00:00 2001 11 | From: Szabolcs Nagy 12 | Date: Wed, 30 Nov 2016 11:44:25 +0000 13 | Subject: [PATCH 1/3] remove broken code path for easier code review 14 | 15 | This patch is not necessary for the bug fix, just makes concurrency 16 | code review easier (removes a data race and overflow from a broken 17 | code path). 18 | 19 | dlopen can oom crash anyway in _dl_resize_dtv and it's probably 20 | better to crash than leave half setup modules around. 21 | 22 | 2016-11-30 Szabolcs Nagy 23 | 24 | * elf/dl-tls.c (_dl_add_to_slotinfo): OOM crash. 25 | --- 26 | elf/dl-tls.c | 16 ++++------------ 27 | 1 file changed, 4 insertions(+), 12 deletions(-) 28 | 29 | diff --git a/elf/dl-tls.c b/elf/dl-tls.c 30 | index cccf74b3..14fe7c6f 100644 31 | --- a/elf/dl-tls.c 32 | +++ b/elf/dl-tls.c 33 | @@ -998,18 +998,10 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) 34 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 35 | if (listp == NULL) 36 | { 37 | - /* We ran out of memory. We will simply fail this 38 | - call but don't undo anything we did so far. The 39 | - application will crash or be terminated anyway very 40 | - soon. */ 41 | - 42 | - /* We have to do this since some entries in the dtv 43 | - slotinfo array might already point to this 44 | - generation. */ 45 | - ++GL(dl_tls_generation); 46 | - 47 | - _dl_signal_error (ENOMEM, "dlopen", NULL, N_("\ 48 | -cannot create TLS data structures")); 49 | + /* We ran out of memory in dlopen while updating tls structures. 50 | + TODO: side-effects should be rolled back and the failure should 51 | + be reported to the caller, but that's hard. */ 52 | + oom (); 53 | } 54 | 55 | listp->len = TLS_SLOTINFO_SURPLUS; 56 | -- 57 | 2.27.0 58 | 59 | -------------------------------------------------------------------------------- /patches/rhel/2.28/unsubmitted-mathworks-glibc-bz19329-2-of-2.el8.patch: -------------------------------------------------------------------------------- 1 | The content of this file is derived from original patches published to libc-alpha at sourceware dot org 2 | by Szabolcs Nagy. The original, unmodified, version is available in the patches/debian/2.27 folder of 3 | this repository or from https://sourceware.org/legacy-ml/libc-alpha/2016-11/msg01093.html 4 | 5 | The original content has been adapted by MathWorks to enable patching glibc 2.28 since there were 6 | source code changes in glibc since the original patches were constructed 7 | 8 | NOTE: ADAPTED TO PATCH v2.28 via rpmbuild 9 | 10 | From c19efcb26d7e73e4baa16f95dc3bd0c18288ee60 Mon Sep 17 00:00:00 2001 11 | From: Szabolcs Nagy 12 | Date: Wed, 30 Nov 2016 11:44:32 +0000 13 | Subject: [PATCH 2/3] Fix data races between pthread_create and dlopen 14 | 15 | This fixes a subset of the issues described in 16 | https://sourceware.org/ml/libc-alpha/2016-11/msg01026.html 17 | without adding locks to pthread_create. 18 | 19 | Only races between dlopen and pthread_create were considered, 20 | and the asserts got removed that tried to check for concurrency 21 | issues. 22 | 23 | The patch is incomplete because dlclose, tls access and 24 | dl_iterate_phdr related code paths are not modified. 25 | 26 | dlclose should be updated in a similar fashion to dlopen 27 | to make the patch complete alternatively pthread_create 28 | may take the GL(dl_load_write_lock) to sync with dlclose 29 | or the GL(dl_load_lock) to sync with dlopen and dlclose 30 | (that would simplify the concurrency design, but increase 31 | lock contention on the locks). 32 | --- 33 | elf/dl-open.c | 7 ++++- 34 | elf/dl-tls.c | 86 ++++++++++++++++++++++++++++++++++++++++++++------- 35 | 2 files changed, 81 insertions(+), 12 deletions(-) 36 | 37 | diff --git a/elf/dl-open.c b/elf/dl-open.c 38 | index 3d49a845..fbdd10e8 100644 39 | --- a/elf/dl-open.c 40 | +++ b/elf/dl-open.c 41 | @@ -395,9 +395,14 @@ update_tls_slotinfo (struct link_map *new) 42 | } 43 | } 44 | 45 | - if (__builtin_expect (++GL(dl_tls_generation) == 0, 0)) 46 | + /* This cannot be in a data-race so non-atomic load is valid too. */ 47 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 48 | + /* Synchronize with _dl_allocate_tls_init (see notes there) and 49 | + avoid storing an overflowed counter. */ 50 | + if (__builtin_expect (newgen == 0, 0)) 51 | _dl_fatal_printf (N_("\ 52 | TLS generation counter wrapped! Please report this.")); 53 | + atomic_store_release (&GL(dl_tls_generation), newgen); 54 | 55 | /* We need a second pass for static tls data, because 56 | _dl_update_slotinfo must not be run while calls to 57 | diff --git a/elf/dl-tls.c b/elf/dl-tls.c 58 | index 14fe7c6f..d1866af1 100644 59 | --- a/elf/dl-tls.c 60 | +++ b/elf/dl-tls.c 61 | @@ -512,6 +512,36 @@ _dl_resize_dtv (dtv_t *dtv) 62 | } 63 | 64 | 65 | +/* 66 | +CONCURRENCY NOTES 67 | + 68 | +dlopen (and dlclose) holds the GL(dl_load_lock) while writing shared state, 69 | +which may be concurrently read by pthread_create and tls access without taking 70 | +the lock, so atomic access should be used. The shared state: 71 | + 72 | + GL(dl_tls_max_dtv_idx) - max modid assigned, (modid can be reused). 73 | + GL(dl_tls_generation) - generation count, incremented by dlopen and dlclose. 74 | + GL(dl_tls_dtv_slotinfo_list) - list of entries, contains generation count 75 | + and link_map for each module with a modid. 76 | + 77 | +A module gets a modid assigned if it has tls, a modid identifies a slotinfo 78 | +entry and it is the index of the corresponding dtv slot. The generation count 79 | +is assigned to slotinfo entries of a newly loaded or unloaded module and its 80 | +newly loaded or unloaded dependencies. 81 | + 82 | +TODO: dlclose may free memory read by a concurrent pthread_create or tls 83 | +access. This is broken now, so it is assumed that dlclose does not free 84 | +link_map structures while pthread_create or __tls_get_addr is reading them. 85 | + 86 | +pthread_create calls _dl_allocate_tls_init (before creating the new thread), 87 | +which should guarantee that the dtv is in a consistent state at the end: 88 | + 89 | +All slotinfo updates with generation <= dtv[0].counter are reflected in the 90 | +dtv and arbitrary later module unloads may also be reflected as unallocated 91 | +entries. (Note: a modid reuse implies a module unload and accessing tls in 92 | +an unloaded module is undefined.) 93 | +*/ 94 | + 95 | void * 96 | _dl_allocate_tls_init (void *result) 97 | { 98 | @@ -523,12 +553,24 @@ _dl_allocate_tls_init (void *result) 99 | struct dtv_slotinfo_list *listp; 100 | size_t total = 0; 101 | size_t maxgen = 0; 102 | + /* Synchronizes with the increments in dl_{open,close}_worker. 103 | + Slotinfo updates of this generation are sequenced before the 104 | + write we read from here. */ 105 | + size_t gen_count = atomic_load_acquire (&GL(dl_tls_generation)); 106 | + /* Either reads from the last write that is sequenced before the 107 | + generation counter increment we synchronized with or a write 108 | + made by a later dlopen/dlclose. dlclose may decrement this, 109 | + but only if related modules are unloaded. So it is an upper 110 | + bound on non-unloaded modids up to gen_count generation. */ 111 | + size_t dtv_slots = atomic_load_relaxed (&GL(dl_tls_max_dtv_idx)); 112 | 113 | /* Check if the current dtv is big enough. */ 114 | - if (dtv[-1].counter < GL(dl_tls_max_dtv_idx)) 115 | + if (dtv[-1].counter < dtv_slots) 116 | { 117 | /* Resize the dtv. */ 118 | dtv = _dl_resize_dtv (dtv); 119 | + /* _dl_resize_dtv rereads GL(dl_tls_max_dtv_idx) which may decrease. */ 120 | + dtv_slots = dtv[-1].counter; 121 | 122 | /* Install this new dtv in the thread data structures. */ 123 | INSTALL_DTV (result, &dtv[-1]); 124 | @@ -545,22 +587,33 @@ _dl_allocate_tls_init (void *result) 125 | for (cnt = total == 0 ? 1 : 0; cnt < listp->len; ++cnt) 126 | { 127 | struct link_map *map; 128 | + size_t gen; 129 | void *dest; 130 | 131 | /* Check for the total number of used slots. */ 132 | - if (total + cnt > GL(dl_tls_max_dtv_idx)) 133 | + if (total + cnt > dtv_slots) 134 | break; 135 | 136 | - map = listp->slotinfo[cnt].map; 137 | + /* Synchronize with dl_add_to_slotinfo and remove_slotinfo. */ 138 | + map = atomic_load_acquire (&listp->slotinfo[cnt].map); 139 | if (map == NULL) 140 | /* Unused entry. */ 141 | continue; 142 | 143 | + /* Consistent generation count with the map read above. 144 | + Inconsistent gen may be read if the entry is being reused, 145 | + in which case it is larger than gen_count and we skip it. */ 146 | + gen = atomic_load_relaxed (&listp->slotinfo[cnt].gen); 147 | + if (gen > gen_count) 148 | + /* New entry. */ 149 | + continue; 150 | + 151 | /* Keep track of the maximum generation number. This might 152 | not be the generation counter. */ 153 | - assert (listp->slotinfo[cnt].gen <= GL(dl_tls_generation)); 154 | - maxgen = MAX (maxgen, listp->slotinfo[cnt].gen); 155 | + maxgen = MAX (maxgen, gen); 156 | 157 | + /* TODO: concurrent dlclose may free map which would break 158 | + the rest of the code below. */ 159 | dtv[map->l_tls_modid].pointer.val = TLS_DTV_UNALLOCATED; 160 | dtv[map->l_tls_modid].pointer.to_free = NULL; 161 | 162 | @@ -590,11 +643,15 @@ _dl_allocate_tls_init (void *result) 163 | } 164 | 165 | total += cnt; 166 | - if (total >= GL(dl_tls_max_dtv_idx)) 167 | + if (total >= dtv_slots) 168 | break; 169 | 170 | - listp = listp->next; 171 | - assert (listp != NULL); 172 | + /* Synchronize with dl_add_to_slotinfo. */ 173 | + listp = atomic_load_acquire (&listp->next); 174 | + /* dtv_slots is an upper bound on the number of entries we care 175 | + about, the list may end sooner. */ 176 | + if (listp == NULL) 177 | + break; 178 | } 179 | 180 | /* The DTV version is up-to-date now. */ 181 | @@ -993,7 +1050,7 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) 182 | the first slot. */ 183 | assert (idx == 0); 184 | 185 | - listp = prevp->next = (struct dtv_slotinfo_list *) 186 | + listp = (struct dtv_slotinfo_list *) 187 | malloc (sizeof (struct dtv_slotinfo_list) 188 | + TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 189 | if (listp == NULL) 190 | @@ -1008,12 +1065,19 @@ _dl_add_to_slotinfo (struct link_map *l, bool do_add) 191 | listp->next = NULL; 192 | memset (listp->slotinfo, '\0', 193 | TLS_SLOTINFO_SURPLUS * sizeof (struct dtv_slotinfo)); 194 | + /* Add the new list item and synchronize with _dl_allocate_tls_init. */ 195 | + atomic_store_release (&prevp->next, listp); 196 | } 197 | 198 | /* Add the information into the slotinfo data structure. */ 199 | if (do_add) 200 | { 201 | - listp->slotinfo[idx].map = l; 202 | - listp->slotinfo[idx].gen = GL(dl_tls_generation) + 1; 203 | + /* This cannot be in a data-race so non-atomic load would be valid too. */ 204 | + size_t newgen = atomic_load_relaxed (&GL(dl_tls_generation)) + 1; 205 | + /* TODO: Concurrent readers may see an overflowed gen, which is bad, 206 | + but overflow is guaranteed to crash the dlopen that is executing. */ 207 | + atomic_store_relaxed (&listp->slotinfo[idx].gen, newgen); 208 | + /* Synchronize with _dl_allocate_tls_init (see notes there). */ 209 | + atomic_store_release (&listp->slotinfo[idx].map, l); 210 | } 211 | } 212 | -- 213 | 2.27.0 214 | 215 | -------------------------------------------------------------------------------- /patches/rhel/2.28/unsubmitted-mathworks-glibc-bz19329-fixup.el8.patch: -------------------------------------------------------------------------------- 1 | The content of this file is derived from original patches published to libc-alpha at sourceware dot org 2 | by Szabolcs Nagy. The original, unmodified, version is available in the patches/debian/2.27 folder of 3 | this repository or from https://sourceware.org/bugzilla/show_bug.cgi?id=19329#c9 4 | 5 | The original content has been adapted by MathWorks to enable patching glibc 2.28 since there were 6 | source code changes in glibc since the original patches were constructed 7 | 8 | NOTE: ADAPTED TO PATCH v2.28 via rpmbuild 9 | 10 | --- 11 | elf/dl-tls.c | 2 +- 12 | 1 file changed, 1 insertion(+), 1 deletion(-) 13 | 14 | diff --git a/elf/dl-tls.c b/elf/dl-tls.c 15 | index d1866af1..46d1e1b8 100644 16 | --- a/elf/dl-tls.c 17 | +++ b/elf/dl-tls.c 18 | @@ -643,7 +643,7 @@ _dl_allocate_tls_init (void *result) 19 | } 20 | 21 | total += cnt; 22 | - if (total >= dtv_slots) 23 | + if (total > dtv_slots) 24 | break; 25 | 26 | /* Synchronize with dl_add_to_slotinfo. */ 27 | -- 28 | 2.27.0 29 | 30 | -------------------------------------------------------------------------------- /scripts/build-glibc-src.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 The MathWorks, Inc. 3 | 4 | source setup-glibc-build-env-vars.sh 5 | 6 | pushd glibc-${VER}/ 7 | 8 | DEBIAN_FRONTEND="noninteractive" TZ="Etc/UTC" apt-get build-dep libc6 -y 9 | env DEB_BUILD_OPTIONS=nocheck dpkg-buildpackage -us -uc -j$(nproc) -------------------------------------------------------------------------------- /scripts/get-glibc-src.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 The MathWorks, Inc. 3 | 4 | source setup-glibc-build-env-vars.sh 5 | 6 | 7 | sed -i "/^deb\s.* ${DIST} main/{p;s/^deb\s/deb-src /}" /etc/apt/sources.list 8 | sed -i "/^deb\s.* ${DIST}-updates main/{p;s/^deb\s/deb-src /}" /etc/apt/sources.list 9 | 10 | apt-get update -y 11 | 12 | apt-get source libc6 13 | 14 | apt-get build-dep libc6 -y 15 | 16 | -------------------------------------------------------------------------------- /scripts/patch-glibc-src.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 The MathWorks, Inc. 3 | 4 | source setup-glibc-build-env-vars.sh 5 | 6 | cat << 'EOF' >> ~/.quiltrc 7 | d=. ; while [ ! -d $d/debian -a `readlink -e $d` != / ]; do d=$d/..; done 8 | if [ -d $d/debian ] && [ -z $QUILT_PATCHES ]; then 9 | # if in Debian packaging tree with unset $QUILT_PATCHES 10 | QUILT_PATCHES="debian/patches" 11 | QUILT_PATCH_OPTS="--reject-format=unified" 12 | QUILT_DIFF_OPTS="-p" 13 | QUILT_DIFF_ARGS="-p ab --no-timestamps --no-index --color=auto" 14 | QUILT_REFRESH_ARGS="-p ab --no-timestamps --no-index" 15 | QUILT_COLORS="diff_hdr=1;32:diff_add=1;34:diff_rem=1;31:diff_hunk=1;33:diff_ctx=35:diff_cctx=33" 16 | if ! [ -d $d/debian/patches ]; then mkdir $d/debian/patches; fi 17 | fi 18 | EOF 19 | 20 | PATCH_FOLDER=$(pwd)/patches/${VER} 21 | 22 | pushd glibc-${VER}/debian/patches 23 | cp ${PATCH_FOLDER}/unsubmitted-mathworks-*.patch any/ 24 | echo any/unsubmitted-mathworks-* | tr ' ' '\n' >> series 25 | 26 | for p in `ls any/` 27 | do 28 | echo "push patch $p" 29 | quilt push 30 | quilt refresh 31 | done 32 | 33 | 34 | quilt pop -a 35 | 36 | dch --newversion="${PKGVER}" "patching glibc" -------------------------------------------------------------------------------- /scripts/setup-glibc-build-env-vars.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The MathWorks, Inc. 2 | 3 | DIST=$(grep -Po "(?<=VERSION_CODENAME=).*" /etc/os-release) 4 | 5 | if [ ${OVERRIDE_DIST_RELEASE} = "true" ] ; then 6 | DIST=${DIST_RELEASE} 7 | fi 8 | 9 | PKGVER=$(dpkg-query --showformat='${Version}' --show libc6).${PKG_EXT-${DIST_BASE}.${DIST}.custom} 10 | VER=$(echo ${PKGVER} | grep -Po "[0-9/.]+(?=-)") 11 | -------------------------------------------------------------------------------- /scripts/update-specfile-al2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2025 The MathWorks, Inc. 3 | 4 | specfile=rpmbuild/SPECS/glibc.spec 5 | patchfile=rpmbuild/SOURCES/glibc.patches 6 | 7 | last_patchnum=$(egrep '^Patch[0-9]+' $patchfile | tail -1 | sed 's/^Patch\([0-9]\+\):.*/\1/') 8 | buildid=$(egrep '%define _buildid\s+' $specfile | tail -1 | sed 's/^%define _buildid\s\+\.//') 9 | new_buildid=$((buildid+1)) 10 | 11 | for p in `ls patches/` 12 | do 13 | echo "push patch $p" 14 | sed -i "/^Patch${last_patchnum}:/a Patch$((last_patchnum+1)): ${p}" $patchfile 15 | last_patchnum=$((last_patchnum+1)) 16 | done 17 | 18 | sed -i "/^Patch${last_patchnum}:/a %global _default_patch_fuzz 2" $patchfile 19 | sed -i "s/^\(%define _buildid\s\+\)\.${buildid}/\1.${new_buildid}/" $specfile -------------------------------------------------------------------------------- /scripts/update-specfile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2021 The MathWorks, Inc. 3 | 4 | specfile=rpmbuild/SPECS/glibc.spec 5 | 6 | last_patchnum=$(egrep '^Patch[0-9]+' $specfile | tail -1 | sed 's/^Patch\([0-9]\+\):.*/\1/') 7 | 8 | for p in `ls patches/` 9 | do 10 | echo "push patch $p" 11 | sed -i "/^Patch${last_patchnum}:/a Patch$((last_patchnum+1)): ${p}" $specfile 12 | last_patchnum=$((last_patchnum+1)) 13 | done 14 | 15 | sed -i "/^Patch${last_patchnum}:/a %global _default_patch_fuzz 2" $specfile 16 | sed -i 's/^\(%define glibcrelease [0-9]\+\)/\1.custom/' $specfile 17 | --------------------------------------------------------------------------------