Auto merge of #119290 - Kobzol:ci-docker-registry-cache, r=Mark-simulacrum
Cache CI Docker images in ghcr registry This PR changes the way `rust-lang` caches Docker images used in CI workflows. Before, the intermediate Docker layers were manually exported from `docker history` and backed up in S3. However, this approach doesn't work any more with the Docker version used by GitHub Actions since August 2023. We had to revert to disabling Docker BuildKit to make the old caching work, but this workaround will stop working eventually, after GitHub updates Docker again and the old build backend will be removed. This PR changes the caching to use [Docker caching](https://docs.docker.com/build/cache/) instead. There are several backends for the cache, for our use-case S3 and Docker registry makes sense. This PR uses the Docker registry backend and uses the ghcr.io registry. The caching creates a Docker image labeled `rust-ci`, which is currently stored to the `ghcr.io/rust-lang-ci` package registry. This image appears [here](https://ghcr.io/rust-lang-ci/rust-ci). The image is stored in `rust-lang-ci` and not `rust-lang`, because `try` and `auto` builds run in the context of that repository, so the used `GITHUB_TOKEN` has permissions for it (unlike for `rust-lang`). For pull request CI runs, the provided `GITHUB_TOKEN` reduces its permissions automatically to `packages: read`, which means that we won't be able to write the Docker image. If we're not able to write, we won't have anything to read. So I disabled the caching entirely for PR runs (it makes it slightly faster to build the Docker image if we don't have to deal with exporting and using a separate build driver). Note that before this PR, we also weren't able to read or write the cache on PR runs. Rustup part of this change is [here](https://github.com/rust-lang/rustup/pull/3648). Related issue: https://github.com/rust-lang/infra-team/issues/81 r? `@Mark-Simulacrum`
This commit is contained in:
commit
7e43442eb6
4
.github/workflows/ci.yml
vendored
4
.github/workflows/ci.yml
vendored
@ -28,6 +28,7 @@ name: CI
|
||||
- "**"
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
defaults:
|
||||
run:
|
||||
shell: bash
|
||||
@ -42,6 +43,7 @@ jobs:
|
||||
CI_JOB_NAME: "${{ matrix.name }}"
|
||||
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
|
||||
HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
SCCACHE_BUCKET: rust-lang-ci-sccache2
|
||||
TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
|
||||
CACHE_DOMAIN: ci-caches.rust-lang.org
|
||||
@ -172,6 +174,7 @@ jobs:
|
||||
CI_JOB_NAME: "${{ matrix.name }}"
|
||||
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
|
||||
HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
SCCACHE_BUCKET: rust-lang-ci-sccache2
|
||||
DEPLOY_BUCKET: rust-lang-ci2
|
||||
TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
|
||||
@ -554,6 +557,7 @@ jobs:
|
||||
CI_JOB_NAME: "${{ matrix.name }}"
|
||||
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
|
||||
HEAD_SHA: "${{ github.event.pull_request.head.sha || github.sha }}"
|
||||
DOCKER_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
|
||||
SCCACHE_BUCKET: rust-lang-ci-sccache2
|
||||
DEPLOY_BUCKET: rust-lang-ci2
|
||||
TOOLSTATE_REPO: "https://github.com/rust-lang-nursery/rust-toolstate"
|
||||
|
@ -74,25 +74,6 @@ if [ -f "$docker_dir/$image/Dockerfile" ]; then
|
||||
|
||||
cksum=$(sha512sum $hash_key | \
|
||||
awk '{print $1}')
|
||||
|
||||
url="https://$CACHE_DOMAIN/docker/$cksum"
|
||||
|
||||
echo "Attempting to download $url"
|
||||
rm -f /tmp/rustci_docker_cache
|
||||
set +e
|
||||
retry curl --max-time 600 -y 30 -Y 10 --connect-timeout 30 -f -L -C - \
|
||||
-o /tmp/rustci_docker_cache "$url"
|
||||
|
||||
docker_archive_hash=$(sha512sum /tmp/rustci_docker_cache | awk '{print $1}')
|
||||
echo "Downloaded archive hash: ${docker_archive_hash}"
|
||||
|
||||
echo "Loading images into docker"
|
||||
# docker load sometimes hangs in the CI, so time out after 10 minutes with TERM,
|
||||
# KILL after 12 minutes
|
||||
loaded_images=$(/usr/bin/timeout -k 720 600 docker load -i /tmp/rustci_docker_cache \
|
||||
| sed 's/.* sha/sha/')
|
||||
set -e
|
||||
printf "Downloaded containers:\n$loaded_images\n"
|
||||
fi
|
||||
|
||||
dockerfile="$docker_dir/$image/Dockerfile"
|
||||
@ -103,46 +84,65 @@ if [ -f "$docker_dir/$image/Dockerfile" ]; then
|
||||
context="$script_dir"
|
||||
fi
|
||||
echo "::group::Building docker image for $image"
|
||||
echo "Image input"
|
||||
cat $hash_key
|
||||
echo "Image input checksum ${cksum}"
|
||||
|
||||
# As of August 2023, Github Actions have updated Docker to 23.X,
|
||||
# which uses the BuildKit by default. It currently throws aways all
|
||||
# intermediate layers, which breaks our usage of S3 layer caching.
|
||||
# Therefore we opt-in to the old build backend for now.
|
||||
export DOCKER_BUILDKIT=0
|
||||
retry docker \
|
||||
build \
|
||||
--rm \
|
||||
-t rust-ci \
|
||||
-f "$dockerfile" \
|
||||
"$context"
|
||||
echo "::endgroup::"
|
||||
# Print docker version
|
||||
docker --version
|
||||
|
||||
if [ "$CI" != "" ]; then
|
||||
s3url="s3://$SCCACHE_BUCKET/docker/$cksum"
|
||||
upload="aws s3 cp - $s3url"
|
||||
digest=$(docker inspect rust-ci --format '{{.Id}}')
|
||||
echo "Built container $digest"
|
||||
if ! grep -q "$digest" <(echo "$loaded_images"); then
|
||||
echo "Uploading finished image $digest to $url"
|
||||
set +e
|
||||
# Print image history for easier debugging of layer SHAs
|
||||
docker history rust-ci
|
||||
docker history -q rust-ci | \
|
||||
grep -v missing | \
|
||||
xargs docker save | \
|
||||
gzip | \
|
||||
$upload
|
||||
set -e
|
||||
else
|
||||
echo "Looks like docker image is the same as before, not uploading"
|
||||
fi
|
||||
# Record the container image for reuse, e.g. by rustup.rs builds
|
||||
info="$dist/image-$image.txt"
|
||||
mkdir -p "$dist"
|
||||
echo "$url" >"$info"
|
||||
echo "$digest" >>"$info"
|
||||
cat "$info"
|
||||
# On non-CI or PR jobs, we don't have permissions to write to the registry cache, so we should
|
||||
# not use `docker login` nor caching.
|
||||
if [[ "$CI" == "" ]] || [[ "$PR_CI_JOB" == "1" ]];
|
||||
then
|
||||
retry docker build --rm -t rust-ci -f "$dockerfile" "$context"
|
||||
else
|
||||
REGISTRY=ghcr.io
|
||||
# Most probably rust-lang-ci, but in general the owner of the repository where CI runs
|
||||
REGISTRY_USERNAME=${GITHUB_REPOSITORY_OWNER}
|
||||
# Tag used to push the final Docker image, so that it can be pulled by e.g. rustup
|
||||
IMAGE_TAG=${REGISTRY}/${REGISTRY_USERNAME}/rust-ci:${cksum}
|
||||
# Tag used to cache the Docker build
|
||||
# It seems that it cannot be the same as $IMAGE_TAG, otherwise it overwrites the cache
|
||||
CACHE_IMAGE_TAG=${REGISTRY}/${REGISTRY_USERNAME}/rust-ci-cache:${cksum}
|
||||
|
||||
# Log into the Docker registry, so that we can read/write cache and the final image
|
||||
echo ${DOCKER_TOKEN} | docker login ${REGISTRY} \
|
||||
--username ${REGISTRY_USERNAME} \
|
||||
--password-stdin
|
||||
|
||||
# Enable a new Docker driver so that --cache-from/to works with a registry backend
|
||||
docker buildx create --use --driver docker-container
|
||||
|
||||
# Build the image using registry caching backend
|
||||
retry docker \
|
||||
buildx \
|
||||
build \
|
||||
--rm \
|
||||
-t rust-ci \
|
||||
-f "$dockerfile" \
|
||||
--cache-from type=registry,ref=${CACHE_IMAGE_TAG} \
|
||||
--cache-to type=registry,ref=${CACHE_IMAGE_TAG},compression=zstd \
|
||||
--output=type=docker \
|
||||
"$context"
|
||||
|
||||
# Print images for debugging purposes
|
||||
docker images
|
||||
|
||||
# Tag the built image and push it to the registry
|
||||
docker tag rust-ci "${IMAGE_TAG}"
|
||||
docker push "${IMAGE_TAG}"
|
||||
|
||||
# Record the container registry tag/url for reuse, e.g. by rustup.rs builds
|
||||
# It should be possible to run `docker pull <$IMAGE_TAG>` to download the image
|
||||
info="$dist/image-$image.txt"
|
||||
mkdir -p "$dist"
|
||||
echo "${IMAGE_TAG}" > "$info"
|
||||
cat "$info"
|
||||
|
||||
echo "To download the image, run docker pull ${IMAGE_TAG}"
|
||||
fi
|
||||
echo "::endgroup::"
|
||||
elif [ -f "$docker_dir/disabled/$image/Dockerfile" ]; then
|
||||
if isCI; then
|
||||
echo Cannot run disabled images on CI!
|
||||
|
@ -34,6 +34,7 @@ x--expand-yaml-anchors--remove:
|
||||
CARGO_REGISTRIES_CRATES_IO_PROTOCOL: sparse
|
||||
# commit of PR sha or commit sha. `GITHUB_SHA` is not accurate for PRs.
|
||||
HEAD_SHA: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
DOCKER_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- &public-variables
|
||||
SCCACHE_BUCKET: rust-lang-ci-sccache2
|
||||
@ -301,6 +302,7 @@ on:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
defaults:
|
||||
run:
|
||||
|
Loading…
Reference in New Issue
Block a user