Re: De-duping attachments

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Sep 15, 2010 at 05:24:11PM +0100, Gavin McCullagh wrote:
> Hi,
> 
> On Wed, 15 Sep 2010, Nik Conwell wrote:
> 
> > Isn't the easy hack for dedup just looking at the above md5 files and 
> > then doing appropriate hard links?  This could be done by a nightly 
> > trawl of the spool space.  A bigger win would be to separate the headers 
> > from the messages but that's a lot more work.
> 
> For what it's worth, I believe the fsdup tool which is part of fslint will
> do this for you.
> 
> 	http://www.pixelbeat.org/fslint/

Or this lovely little toy.  It uses the fact that in current versions of
Cyrus the "GUID" field is actually the sha1 of the underlying file.

Bron ( warning: may contain FastMail specific assuptions )
#!/usr/bin/perl -w

# SETUP {{{
use strict;
use warnings;
BEGIN { do "/home/mod_perl/hm/ME/FindLibs.pm"; }
use Date::Manip;
use MailApp::Admin::Actions;
use IO::File;
use ME::Machine;
use Cyrus::HeaderFile;
use Data::Dumper;
use Cyrus::IndexFile;
use Getopt::Std;
use Digest::SHA1;
use ME::CyrusBackup;
use ME::User;
use Data::Dumper;
# }}}

my $sn = shift;

my (undef,undef,$uid,$gid) = getpwnam('cyrus');

foreach my $Slot (ME::Machine->ImapSlots()) {
  next if ($sn and $sn ne $Slot->Name());
  my $users = $Slot->AllMailboxes();
  my $conf = $Slot->ImapdConf();
  foreach my $user (sort keys %$users) {
    process($conf, $user, $users->{$user});
  }
}

sub process {
  my ($conf, $user, $folders) = @_;
  print "$user\n";
  my %ihave;
  foreach my $folder (@$folders) {
    my $meta = $conf->GetUserLocation('meta', $user, 'default', $folder);
    my $index = Cyrus::IndexFile->new_file("$meta/cyrus.index") || die "Failed to open $meta/cyrus.index";
    while (my $record = $index->next_record()) {
      push @{$ihave{$record->{MessageGuid}}}, [$folder, $record->{Uid}];
    }
  }

  foreach my $guid (keys %ihave) {
    next if @{$ihave{$guid}} <= 1;
    my ($inode, $srcname);
    my @others;
    foreach my $item (@{$ihave{$guid}}) {
      my $spool = $conf->GetUserLocation('spool', $user, 'default', $item->[0]);
      $spool =~ s{/$}{};
      my $file = "$spool/$item->[1].";
      my (@sd) = stat($file);
      if ($inode) {
        next if $sd[1] == $inode;
        push @others, $file;
      }
      else {
        $inode = $sd[1];
        $srcname = $file;
      }
    }
    next unless @others;
    print "fixing up files for $guid ($srcname)\n";
    foreach my $file (@others) {
      my $tmpfile = $file . "tmp";
      print "link error $tmpfile\n" unless link($srcname, $tmpfile);
      chown($uid, $gid, $tmpfile);
      chmod(0600, $tmpfile);
      print "rename error $file\n" unless rename($tmpfile, $file);
    }
  }
}
----
Cyrus Home Page: http://www.cyrusimap.org/
List Archives/Info: http://lists.andrew.cmu.edu/pipermail/info-cyrus/

[Index of Archives]     [Cyrus SASL]     [Squirrel Mail]     [Asterisk PBX]     [Video For Linux]     [Photo]     [Yosemite News]     [gtk]     [KDE]     [Gimp on Windows]     [Steve's Art]

  Powered by Linux