The main problem here is the 'archive' filterlist is still pretty big, over 20MiB. I don't want to push the fedfind changes out to the public until it's smaller. With this change the imagelist files are all tiny, under 250KiB. pr #26 is an alternative to this which just extends the exclusion list of the previous approach, but nirik says he prefers this approach. >From 5b7169688a1732f49ff0bb6320fa34e641b363de Mon Sep 17 00:00:00 2001 From: Adam Williamson <awilliam@xxxxxxxxxx> Date: Mon, 21 Nov 2016 19:36:49 -0800 Subject: [PATCH] turn 'filterlist' into 'imagelist', using productmd This adopts https://pagure.io/quick-fedora-mirror/pull-request/27 and adapts to it, so we get `imagelist` files rather than `filterlist` files (see recent commits for this). The rationale is more fully explained in that PR (and in PR #26 also) - on further inspection it turns out that we have to filter out an awful lot of extensions to create small filterlists for all three modules, and I'm worried that other file extensions may appear in the future and cause the filterlists to suddenly get bigger again. Instead, we have create-filelist use the productmd constant that defines valid image formats, and only include files that match those formats in the list. The downside of this approach is we have to ensure productmd on all the systems that run `create-filelist` is kept up to date if the list of valid image formats changes. --- files/scripts/create-filelist | 15 ++++++++------- files/scripts/update-fullfiletimelist | 26 +++++++++++++------------- playbooks/groups/secondary.yml | 1 + roles/bodhi2/backend/tasks/main.yml | 1 + roles/releng/tasks/main.yml | 1 + 5 files changed, 24 insertions(+), 20 deletions(-) diff --git a/files/scripts/create-filelist b/files/scripts/create-filelist index e485efb..0beaced 100755 --- a/files/scripts/create-filelist +++ b/files/scripts/create-filelist @@ -12,6 +12,7 @@ import argparse import hashlib import os import sys +from productmd.images import SUPPORTED_IMAGE_FORMATS from scandir import scandir @@ -58,8 +59,8 @@ def parseopts(): null = open(os.devnull, 'w') p = argparse.ArgumentParser( description='Generate a list of files and times, suitable for consumption by quick-fedora-mirror, ' - 'and a much smaller list with packages, Device Tree boot files, HTML files, pictures ' - 'and directories filtered out, for consumption by fedfind.') + 'and a much smaller list of only files that match one of the productmd supported ' + 'image types, for use by fedfind.') p.add_argument('-c', '--checksum', action='store_true', help='Include checksums of all repomd.xml files in the file list.') p.add_argument('-C', '--checksum-file', action='append', dest='checksum_files', @@ -75,8 +76,8 @@ def parseopts(): help='Filename of the file list with times (default: stdout).') p.add_argument('-f', '--filelist', type=argparse.FileType('w'), default=null, help='Filename of the file list without times (default: no plain file list is generated).') - p.add_argument('-F', '--filterlist', type=argparse.FileType('w'), default=null, - help='Filename of the filtered file list for fedfind (default: not generated).') + p.add_argument('-i', '--imagelist', type=argparse.FileType('w'), default=null, + help='Filename of the image file list for fedfind (default: not generated).') opts = p.parse_args() @@ -112,9 +113,9 @@ def main(): # opts.filelist.write(entry.path + '\n') print(entry.path, file=opts.filelist) # write to filtered list if appropriate - skips = ('.rpm', '.drpm', '.dtb', '.html', '.png', '.jpg') - if not any(entry.path.endswith(skip) for skip in skips) and not (entry.is_dir()): - print(entry.path, file=opts.filterlist) + imgs = ['.{0}'.format(form) for form in SUPPORTED_IMAGE_FORMATS] + if any(entry.path.endswith(img) for img in imgs): + print(entry.path, file=opts.imagelist) if entry.name in opts.checksum_files: checksums[entry.path[2:]] = True info = entry.stat(follow_symlinks=False) diff --git a/files/scripts/update-fullfiletimelist b/files/scripts/update-fullfiletimelist index f6c225a..c0439da 100755 --- a/files/scripts/update-fullfiletimelist +++ b/files/scripts/update-fullfiletimelist @@ -25,7 +25,7 @@ CREATE=/usr/local/bin/create-filelist # context. FILELIST=fullfilelist TIMELIST='fullfiletimelist-$mod' -FILTERLIST='filterlist-$mod' +IMAGELIST='imagelist-$mod' usage () { echo @@ -108,12 +108,12 @@ cd $tmpd for mod in $MODS; do currentfl=$TOPD/$mod/${FILELIST/'$mod'/$mod} currenttl=$TOPD/$mod/${TIMELIST/'$mod'/$mod} - currentsl=$TOPD/$mod/${FILTERLIST/'$mod'/$mod} + currentil=$TOPD/$mod/${IMAGELIST/'$mod'/$mod} flname=$(basename $currentfl) tlname=$(basename $currenttl) - slname=$(basename $currentsl) + ilname=$(basename $currentil) - $CREATE -c -s -d $TOPD/$mod -f $flname -t $tlname -F $slname + $CREATE -c -s -d $TOPD/$mod -f $flname -t $tlname -i $ilname # If a file list exists and doesn't differ from what we just generated, # delete the latter. @@ -123,8 +123,8 @@ cd $tmpd if [[ -f $currenttl ]] && diff -q $currenttl $tlname > /dev/null; then rm -f $tlname fi - if [[ -f $currentsl ]] && diff -q $currentsl $slname > /dev/null; then - rm -f $slname + if [[ -f $currentil ]] && diff -q $currentil $ilname > /dev/null; then + rm -f $ilname fi done @@ -134,13 +134,13 @@ cd $tmpd for mod in $MODS; do currentfl=$TOPD/$mod/${FILELIST/'$mod'/$mod} currenttl=$TOPD/$mod/${TIMELIST/'$mod'/$mod} - currentsl=$TOPD/$mod/${FILTERLIST/'$mod'/$mod} + currentil=$TOPD/$mod/${IMAGELIST/'$mod'/$mod} flname=$(basename $currentfl) fldir=$(dirname $currentfl) tlname=$(basename $currenttl) tldir=$(dirname $currenttl) - slname=$(basename $currentsl) - sldir=$(dirname $currentsl) + ilname=$(basename $currentil) + ildir=$(dirname $currentil) if [[ -f $flname ]]; then tmpf=$(mktemp -p $fldir $flname.XXXXXXXXXX) @@ -154,11 +154,11 @@ cd $tmpd chmod 644 $tmpf mv $tmpf $currenttl fi - if [[ -f $slname ]]; then - tmpf=$(mktemp -p $sldir $slname.XXXXXXXXXX) - cp -p $slname $tmpf + if [[ -f $ilname ]]; then + tmpf=$(mktemp -p $ildir $ilname.XXXXXXXXXX) + cp -p $ilname $tmpf chmod 644 $tmpf - mv $tmpf $currentsl + mv $tmpf $currentil fi done diff --git a/playbooks/groups/secondary.yml b/playbooks/groups/secondary.yml index e4a49a8..1d6f545 100644 --- a/playbooks/groups/secondary.yml +++ b/playbooks/groups/secondary.yml @@ -58,6 +58,7 @@ - createrepo - koji - python-scandir + - python2-productmd - name: add create-filelist script from quick-fedora-mirror copy: src="{{ files }}/scripts/create-filelist" dest=/usr/local/bin/create-filelist mode=0755 diff --git a/roles/bodhi2/backend/tasks/main.yml b/roles/bodhi2/backend/tasks/main.yml index 08105b5..674b03a 100644 --- a/roles/bodhi2/backend/tasks/main.yml +++ b/roles/bodhi2/backend/tasks/main.yml @@ -24,6 +24,7 @@ - sigul - python-alembic - python-scandir + - python2-productmd tags: - packages - bodhi diff --git a/roles/releng/tasks/main.yml b/roles/releng/tasks/main.yml index d02fbea..f85ebd2 100644 --- a/roles/releng/tasks/main.yml +++ b/roles/releng/tasks/main.yml @@ -117,6 +117,7 @@ - pungi - compose-utils - python-scandir + - python2-productmd when: ansible_distribution_major_version|int > 21 # add sigul to secondary arch compose boxes -- 2.10.2 -- Adam Williamson Fedora QA Community Monkey IRC: adamw | Twitter: AdamW_Fedora | identi.ca: adamwfedora http://www.happyassassin.net _______________________________________________ infrastructure mailing list -- infrastructure@xxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to infrastructure-leave@xxxxxxxxxxxxxxxxxxxxxxx