[PATCH] s3-mirror: sync s3-sync-path script with ideas from s3.sh

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



1. sync everything except for repomd.xml
2. then sync repomd.xml files only, and invalidate caches
3. gently wait a bit to give current downloads a chance
4. delete outdated RPMs and metadata, shouldn't be needed

Also make the sleep/cache configurable.
---
 roles/s3-mirror/files/s3-sync-path.sh | 99 ++++++++++++++-------------
 roles/s3-mirror/files/s3.sh           | 19 +++--
 2 files changed, 65 insertions(+), 53 deletions(-)

diff --git a/roles/s3-mirror/files/s3-sync-path.sh b/roles/s3-mirror/files/s3-sync-path.sh
index 79b4d63eb..5a414e3ad 100644
--- a/roles/s3-mirror/files/s3-sync-path.sh
+++ b/roles/s3-mirror/files/s3-sync-path.sh
@@ -9,58 +9,65 @@ if [[ "$1" == "" ]] || [[ $1 != /pub* ]] || [[ $1 != */ ]]; then
   exit 1
 fi
 
+aws_sync=( aws s3 sync --no-follow-symlinks )
+
 # first run do not delete anything or copy the repodata.
-CMD1="aws s3 sync                   \
-  --exclude */repodata/*         \
-  --exclude *.snapshot/*          \
-  --exclude *source/*             \
-  --exclude *SRPMS/*              \
-  --exclude *debug/*              \
-  --exclude *beta/*               \
-  --exclude *ppc/*                \
-  --exclude *ppc64/*              \
-  --exclude *repoview/*           \
-  --exclude *Fedora/*             \
-  --exclude *EFI/*                \
-  --exclude *core/*               \
-  --exclude *extras/*             \
-  --exclude *LiveOS/*             \
-  --exclude *development/rawhide/* \
-  --no-follow-symlinks            \
-  --only-show-errors              \
-  "
-  #--dryrun                         \
+exclude=(
+  --exclude "*/repodata/*"
+  --exclude "*.snapshot/*"
+  --exclude "*source/*"
+  --exclude "*SRPMS/*"
+  --exclude "*debug/*"
+  --exclude "*beta/*"
+  --exclude "*ppc/*"
+  --exclude "*ppc64/*"
+  --exclude "*repoview/*"
+  --exclude "*Fedora/*"
+  --exclude "*EFI/*"
+  --exclude "*core/*"
+  --exclude "*extras/*"
+  --exclude "*LiveOS/*"
+  --exclude "*development/rawhide/*"
+  --only-show-errors
+)
 
-# second we delete old content and also copy the repodata
-CMD2="aws s3 sync                   \
-  --delete                         \
-  --exclude *.snapshot/*          \
-  --exclude *source/*             \
-  --exclude *SRPMS/*              \
-  --exclude *debug/*              \
-  --exclude *beta/*               \
-  --exclude *ppc/*                \
-  --exclude *ppc64/*              \
-  --exclude *repoview/*           \
-  --exclude *Fedora/*             \
-  --exclude *EFI/*                \
-  --exclude *core/*               \
-  --exclude *extras/*             \
-  --exclude *LiveOS/*             \
-  --exclude *development/rawhide/* \
-  --no-follow-symlinks            \
-  --only-show-errors              \
-  "
-  #--dryrun                         \
+S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org
+DIST_ID=E2KJMDC0QAJDMU
+MAX_CACHE_SEC=60
+DNF_GENTLY_TIMEOUT=120
+
+# First run this command that syncs, but does not delete.
+# It also excludes repomd.xml.
+CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" )
+
+# Next we run this command which syncs repomd.xml files.  Include must precede
+# the large set of excludes.  Make sure that the 'max-age' isn't too large so
+# we know that we can start removing old data ASAP.
+CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" "${excludes[@]}"
+                        --cache-control "max-age=$MAX_CACHE_SEC" )
+
+# Then we delete old RPMs and old metadata (but after invalidating caches).
+CMD3=( "${aws_sync[@]}" "${excludes[@]}" --delete )
 
 #echo "$CMD /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1"
 echo "Starting $1 sync at $(date)" >> /var/log/s3-mirror/timestamps
-$CMD1 /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1
-$CMD1 /srv$1/repodata/ s3://s3-mirror-us-west-1-02.fedoraproject.org$1/repodata/
+"${CMD1[@]}" "/srv$1" "s3://$S3_MIRROR$1"
+"${CMD2[@]}" "/srv$1" "s3://$S3_MIRROR$1"
+
 # Always do the invalidations because they are quick and prevent issues
 # depending on which path is synced.
-for file in $(echo $1/repodata/* ); do
-  aws cloudfront create-invalidation --distribution-id E2KJMDC0QAJDMU --paths "$file" > /dev/null
+for file in $(echo $1/repodata/repomd.xml ); do
+  aws cloudfront create-invalidation --distribution-id $DIST_ID --paths "$file" > /dev/null
 done
-$CMD2 /srv$1 s3://s3-mirror-us-west-1-02.fedoraproject.org$1
+
+SLEEP=$(( MAX_CACHE_SEC + DNF_GENTLY_TIMEOUT ))
+echo "Ready $1 sync, giving dnf downloads ${SLEEP}s before delete, at $(date)" >> /var/log/s3-mirror/timestamps
+
+# Consider some DNF processes started downloading metadata before we invalidated
+# caches, and started with outdated repomd.xml file.  Give it few more seconds
+# so they have chance to download the rest of metadata and RPMs.
+sleep $SLEEP
+
+"${CMD3[@]}" "/srv$1" "s3://$S3_MIRROR$1"
+
 echo "Ending $1 sync at $(date)" >> /var/log/s3-mirror/timestamps
diff --git a/roles/s3-mirror/files/s3.sh b/roles/s3-mirror/files/s3.sh
index c157b0cdb..df58ac153 100644
--- a/roles/s3-mirror/files/s3.sh
+++ b/roles/s3-mirror/files/s3.sh
@@ -88,6 +88,11 @@ excludes=(
   --exclude "*/updates/testing/29/*"
 )
 
+S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org
+DIST_ID=E2KJMDC0QAJDMU
+MAX_CACHE_SEC=60
+DNF_GENTLY_TIMEOUT=120
+
 # First run this command that syncs, but does not delete.
 # It also excludes repomd.xml.
 CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" )
@@ -95,14 +100,12 @@ CMD1=( "${aws_sync[@]}" "${excludes[@]}" --exclude "*/repomd.xml" )
 # Next we run this command which syncs repomd.xml files.  Include must precede
 # the large set of excludes.  Make sure that the 'max-age' isn't too large so
 # we know that we can start removing old data ASAP.
-CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" "${excludes[@]}" --cache-control max-age=300 )
+CMD2=( "${aws_sync[@]}" --exclude "*" --include "*/repomd.xml" "${excludes[@]}"
+                        --cache-control "max-age=$MAX_CACHE_SEC" )
 
 # Then we delete old RPMs and old metadata (but after invalidating caches).
 CMD3=( "${aws_sync[@]}" "${excludes[@]}" --delete )
 
-S3_MIRROR=s3-mirror-us-west-1-02.fedoraproject.org
-DIST_ID=E2KJMDC0QAJDMU
-
 # Sync EPEL
 #echo $CMD /srv/pub/epel/ s3://$S3_MIRROR/pub/epel/
 echo "Starting EPEL sync at $(date)" >> /var/log/s3-mirror/timestamps
@@ -132,10 +135,12 @@ for file in $(echo /srv/pub/fedora/linux/updates/*/*/*/repodata/repomd.xml | sed
   aws cloudfront create-invalidation --distribution-id "$DIST_ID" --paths "$file"
 done
 
+SLEEP=$(( MAX_CACHE_SEC + DNF_GENTLY_TIMEOUT ))
+
 # Consider some DNF processes started downloading metadata before we invalidated
-# caches, and started with outdated repomd.xml file.  Give it 10 minutes so they
-# have chance to download the rest of metadata and RPMs.
-sleep 600
+# caches, and started with outdated repomd.xml file.  Give it few more seconds
+# so they have chance to download the rest of metadata and RPMs.
+sleep $SLEEP
 
 "${CMD3[@]}" /srv/pub/epel/ "s3://$S3_MIRROR/pub/epel/"
 "${CMD3[@]}" /srv/pub/fedora/ s3://$S3_MIRROR/pub/fedora/
-- 
2.25.1
_______________________________________________
infrastructure mailing list -- infrastructure@xxxxxxxxxxxxxxxxxxxxxxx
To unsubscribe send an email to infrastructure-leave@xxxxxxxxxxxxxxxxxxxxxxx
Fedora Code of Conduct: https://docs.fedoraproject.org/en-US/project/code-of-conduct/
List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines
List Archives: https://lists.fedoraproject.org/archives/list/infrastructure@xxxxxxxxxxxxxxxxxxxxxxx




[Index of Archives]     [Fedora Development]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Yosemite News]     [KDE Users]

  Powered by Linux