1. sync everything except for repomd.xml 2. then sync repomd.xml files only, and invalidate caches 3. gently wait a bit to give current downloads a chance 4. delete outdated RPMs and metadata, shouldn't be needed Also make the sleep/cache configurable. --- roles/s3-mirror/files/s3-sync-path.sh | 99 ++++++++++++++------------- roles/s3-mirror/files/s3.sh | 19 +++-- 2 files changed, 65 insertions(+), 53 deletions(-) diff --git a/roles/s3-mirror/files/s3-sync-path.sh b/roles/s3-mirror/files/s3-sync-path.sh index 79b4d63eb..5a414e3ad 100644 --- a/roles/s3-mirror/files/s3-sync-path.sh +++ b/roles/s3-mirror/files/s3-sync-path.sh @@ -9,58 +9,65 @@ if [[ "$1" == "" ]] || [[ $1 != /pub* ]] || [[ $1 != */ ]]; then exit 1 fi +aws_sync=( aws s3 sync --no-follow-symlinks ) + # first run do not delete anything or copy the repodata. -CMD1="aws s3 sync \ - --exclude */repodata/* \ - --exclude *.snapshot/* \ - --exclude *source/* \ - --exclude *SRPMS/* \ - --exclude *debug/* \ - --exclude *beta/* \ - --exclude *ppc/* \ - --exclude *ppc64/* \ - --exclude *repoview/* \ - --exclude *Fedora/* \ - --exclude *EFI/* \ - --exclude *core/* \ - --exclude *extras/* \ - --exclude *LiveOS/* \ - --exclude *development/rawhide/* \ - --no-follow-symlinks \ - --only-show-errors \ - " - #--dryrun \ +exclude=( + --exclude "*/repodata/*" + --exclude "*.snapshot/*" + --exclude "*source/*" + --exclude "*SRPMS/*" + --exclude "*debug/*" + --exclude "*beta/*" + --exclude "*ppc/*" + --exclude "*ppc64/*" + --exclude "*repoview/*" + --exclude "*Fedora/*" + --exclude "*EFI/*" + --exclude "*core/*" + --exclude "*extras/*" + --exclude "*LiveOS/*" + --exclude "*development/rawhide/*" + --only-show-errors +) -# second we delete old content and also copy the repodata -CMD2="aws s3 sync \ - --delete \ - --exclude *.snapshot/* \ - --exclude *source/* \ - --exclude *SRPMS/* \ - --exclude *debug/* \ - --exclude *beta/* \ - --exclude *ppc/* \ - --exclude *ppc64/* \ - --exclude *repoview/* \ - --exclude *Fedora/* \ - --exclude *EFI/* \ - --exclude *core/* \ - --exclude *extras/* \ - --exclude *LiveOS/* \ - --exclude *development/rawhide/* \ - --no-follow-symlinks \ - --only-show-errors \ - " - #--dryrun \ +S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org +DIST_ID=E2KJMDC0QAJDMU +MAX_CACHE_SEC=60 +DNF_GENTLY_TIMEOUT=120 + +# First run this command that syncs, but does not delete. +# It also excludes repomd.xml. +CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" ) + +# Next we run this command which syncs repomd.xml files. Include must precede +# the large set of excludes. Make sure that the 'max-age' isn't too large so +# we know that we can start removing old data ASAP. +CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" "${excludes[@]}" + --cache-control "max-age=$MAX_CACHE_SEC" ) + +# Then we delete old RPMs and old metadata (but after invalidating caches). +CMD3=( "${aws_sync[@]}" "${excludes[@]}" --delete ) #echo "$CMD /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1" echo "Starting $1 sync at $(date)" >> /var/log/s3-mirror/timestamps -$CMD1 /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1 -$CMD1 /srv$1/repodata/ s3://s3-mirror-us-west-1-02.fedoraproject.org$1/repodata/ +"${CMD1[@]}" "/srv$1" "s3://$S3_MIRROR$1" +"${CMD2[@]}" "/srv$1" "s3://$S3_MIRROR$1" + # Always do the invalidations because they are quick and prevent issues # depending on which path is synced. -for file in $(echo $1/repodata/* ); do - aws cloudfront create-invalidation --distribution-id E2KJMDC0QAJDMU --paths "$file" > /dev/null +for file in $(echo $1/repodata/repomd.xml ); do + aws cloudfront create-invalidation --distribution-id $DIST_ID --paths "$file" > /dev/null done -$CMD2 /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1 + +SLEEP=$(( MAX_CACHE_SEC + DNF_GENTLY_TIMEOUT )) +echo "Ready $1 sync, giving dnf downloads ${SLEEP}s before delete, at $(date)" >> /var/log/s3-mirror/timestamps + +# Consider some DNF processes started downloading metadata before we invalidated +# caches, and started with outdated repomd.xml file. Give it few more seconds +# so they have chance to download the rest of metadata and RPMs. +sleep $SLEEP + +"${CMD3[@]}" "/srv$1" "s3://$S3_MIRROR$1" + echo "Ending $1 sync at $(date)" >> /var/log/s3-mirror/timestamps diff --git a/roles/s3-mirror/files/s3.sh b/roles/s3-mirror/files/s3.sh index c157b0cdb..df58ac153 100644 --- a/roles/s3-mirror/files/s3.sh +++ b/roles/s3-mirror/files/s3.sh @@ -88,6 +88,11 @@ excludes=( --exclude "*/updates/testing/29/*" ) +S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org +DIST_ID=E2KJMDC0QAJDMU +MAX_CACHE_SEC=60 +DNF_GENTLY_TIMEOUT=120 + # First run this command that syncs, but does not delete. # It also excludes repomd.xml. CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" ) @@ -95,14 +100,12 @@ CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" ) # Next we run this command which syncs repomd.xml files. Include must precede # the large set of excludes. Make sure that the 'max-age' isn't too large so # we know that we can start removing old data ASAP. -CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" "${excludes[@]}" --cache-control max-age=300 ) +CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" "${excludes[@]}" + --cache-control "max-age=$MAX_CACHE_SEC" ) # Then we delete old RPMs and old metadata (but after invalidating caches). CMD3=( "${aws_sync[@]}" "${excludes[@]}" --delete ) -S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org -DIST_ID=E2KJMDC0QAJDMU - # Sync EPEL #echo $CMD /srv/pub/epel/ s3://$S3_MIRROR/pub/epel/ echo "Starting EPEL sync at $(date)" >> /var/log/s3-mirror/timestamps @@ -132,10 +135,12 @@ for file in $(echo /srv/pub/fedora/linux/updates/*/*/*/repodata/repomd.xml | sed aws cloudfront create-invalidation --distribution-id "$DIST_ID" --paths "$file" done +SLEEP=$(( MAX_CACHE_SEC + DNF_GENTLY_TIMEOUT )) + # Consider some DNF processes started downloading metadata before we invalidated -# caches, and started with outdated repomd.xml file. Give it 10 minutes so they -# have chance to download the rest of metadata and RPMs. -sleep 600 +# caches, and started with outdated repomd.xml file. Give it few more seconds +# so they have chance to download the rest of metadata and RPMs. +sleep $SLEEP "${CMD3[@]}" /srv/pub/epel/ "s3://$S3_MIRROR/pub/epel/" "${CMD3[@]}" /srv/pub/fedora/ s3://$S3_MIRROR/pub/fedora/ -- 2.25.1 _______________________________________________ infrastructure mailing list -- infrastructure@xxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to infrastructure-leave@xxxxxxxxxxxxxxxxxxxxxxx Fedora Code of Conduct: https://docs.fedoraproject.org/en-US/project/code-of-conduct/ List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines List Archives: https://lists.fedoraproject.org/archives/list/infrastructure@xxxxxxxxxxxxxxxxxxxxxxx