On Wed, Sep 15, 2010 at 05:24:11PM +0100, Gavin McCullagh wrote: > Hi, > > On Wed, 15 Sep 2010, Nik Conwell wrote: > > > Isn't the easy hack for dedup just looking at the above md5 files and > > then doing appropriate hard links? This could be done by a nightly > > trawl of the spool space. A bigger win would be to separate the headers > > from the messages but that's a lot more work. > > For what it's worth, I believe the fsdup tool which is part of fslint will > do this for you. > > http://www.pixelbeat.org/fslint/ Or this lovely little toy. It uses the fact that in current versions of Cyrus the "GUID" field is actually the sha1 of the underlying file. Bron ( warning: may contain FastMail specific assuptions )
#!/usr/bin/perl -w # SETUP {{{ use strict; use warnings; BEGIN { do "/home/mod_perl/hm/ME/FindLibs.pm"; } use Date::Manip; use MailApp::Admin::Actions; use IO::File; use ME::Machine; use Cyrus::HeaderFile; use Data::Dumper; use Cyrus::IndexFile; use Getopt::Std; use Digest::SHA1; use ME::CyrusBackup; use ME::User; use Data::Dumper; # }}} my $sn = shift; my (undef,undef,$uid,$gid) = getpwnam('cyrus'); foreach my $Slot (ME::Machine->ImapSlots()) { next if ($sn and $sn ne $Slot->Name()); my $users = $Slot->AllMailboxes(); my $conf = $Slot->ImapdConf(); foreach my $user (sort keys %$users) { process($conf, $user, $users->{$user}); } } sub process { my ($conf, $user, $folders) = @_; print "$user\n"; my %ihave; foreach my $folder (@$folders) { my $meta = $conf->GetUserLocation('meta', $user, 'default', $folder); my $index = Cyrus::IndexFile->new_file("$meta/cyrus.index") || die "Failed to open $meta/cyrus.index"; while (my $record = $index->next_record()) { push @{$ihave{$record->{MessageGuid}}}, [$folder, $record->{Uid}]; } } foreach my $guid (keys %ihave) { next if @{$ihave{$guid}} <= 1; my ($inode, $srcname); my @others; foreach my $item (@{$ihave{$guid}}) { my $spool = $conf->GetUserLocation('spool', $user, 'default', $item->[0]); $spool =~ s{/$}{}; my $file = "$spool/$item->[1]."; my (@sd) = stat($file); if ($inode) { next if $sd[1] == $inode; push @others, $file; } else { $inode = $sd[1]; $srcname = $file; } } next unless @others; print "fixing up files for $guid ($srcname)\n"; foreach my $file (@others) { my $tmpfile = $file . "tmp"; print "link error $tmpfile\n" unless link($srcname, $tmpfile); chown($uid, $gid, $tmpfile); chmod(0600, $tmpfile); print "rename error $file\n" unless rename($tmpfile, $file); } } }
---- Cyrus Home Page: http://www.cyrusimap.org/ List Archives/Info: http://lists.andrew.cmu.edu/pipermail/info-cyrus/