Re: Plain text dump of cyrus.index?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sun, Apr 01, 2007 at 07:30:11PM -0400, Miller, Greg wrote:
> Hello,
> 
>  
> 
> I found myself in a the situation where I need to do the opposite of a
> reconstruct on my mailboxes. That is, instead of rebuilding the cyrus.*
> files to reflect the messages on the file system, I want to query the
> cyrus.* files to see which message files are indexed, and then delete
> any extraneous message files on the file system.
> 
>  
> 
> My first step is to find a utility that can dump cyrus.index so that I
> can determine which message file are valid, and therefore should not be
> deleted.

This isn't a utility as such, but still:

#!/usr/bin/perl

require 'IndexFile.pm';

my $path = shift;

my $index = Cyrus::IndexFile->new_file($path);

while (my $record = $index->next_record()) {
  print "$record->{Uid}\n";
}

I've written enough little tools to unpack parts of the index file
that I got sick of it and just wrote something that can pack and
unpack the whole thing!  Yay for evil little perl utilities.  It's
not safe against index file format changes, but it's quite extensible
in that case, just add some more formats to the start of the file :)

Bron.
#!/usr/bin/perl -c

# Package to handle Cyrus Index files (version 9 only)

package Cyrus::IndexFile;

use strict;
use warnings;

use IO::File;
use IO::Handle;
use File::Temp;
use Data::Dumper;

# Set up header and record formatting information {{{

my $VersionFormats = {
  9 => {
    HeaderSize => 96,
    HeaderFields => _make_fields(<<EOF),
Generation            int32  4
Format                int32  4
MinorVersion          int32  4
StartOffset           int32  4
RecordSize            int32  4
Exists                int32  4
LastAppenddate        time_t 4
LastUid               int32  4
QuotaUsed             int64  8
Pop3LastLogin         time_t 4
UidValidity           int32  4
Deleted               int32  4
Answered              int32  4
Flagged               int32  4
Options               bitmap 4
LeakedCache           int32  4
HigestModseq          int64  8
Spare0                int32  4
Spare1                int32  4
Spare2                int32  4
Spare3                int32  4
Spare4                int32  4
EOF
    RecordSize => 80, # defined in file too, check it!
    RecordFields => _make_fields(<<EOF),
Uid                   int32  4
InternalDate          time_t 4
SentDate              time_t 4
Size                  int32  4
HeaderSize            int32  4
ContentOffset         int32  4
CacheOffset           int32  4
LastUpdated           time_t 4
SystemFlags           bitmap 4
UserFlags             bitmap 16
ContentLines          int32  4
CacheVersion          int32  4
MessageUuid           hex    12
Modseq                int64  8
EOF
  },
};

sub _make_fields {
  my $string = shift;

  my @lines = grep { m/\S/ } split /\n/, $string;

  my @items;
  foreach my $line (@lines) {
    my ($Name, $Type, $Size) = split /\s+/, $line;

    push @items, [$Name, $Type, $Size];
  }

  return \@items;
}

# }}}

# PUBLIC API

sub new {
  my $class = shift;
  my $handle = shift;

  # read header
  my $buf;
  # XXX - check for success!
  sysread($handle, $buf, 12);
  my $version = convert_from('int32', substr($buf, 8));
  my $Self = bless { version => $version }, ref($class) || $class;
  if (my $frm = $VersionFormats->{$version}) {
    $Self->{format} = $frm;
    my $rest;
    sysread($handle, $rest, $frm->{HeaderSize} - 12);
    $buf .= $rest;
  }
  $Self->{rawheader} = $buf;
  $Self->{header} = $Self->parse_header($buf);
  $Self->{records_done} = 0;
  $Self->{handle} = $handle;
  return $Self;
}

sub new_file {
  my $class = shift;
  my $file = shift;

  my $fh = IO::File->new("< $file") 
    || die "Can't open $file for read: $!";

  return $class->new($fh);
}

sub header {
  my $Self = shift;
  my $Field = shift;

  if ($Field) {
    return $Self->{header}{$Field};
  }

  return $Self->{header};
}

sub next_record {
  my $Self = shift;

  my $RecordSize = $Self->header('RecordSize');
  my $Exists = $Self->header('Exists');

  if ($Self->{records_done} < $Exists) {
    my $buf;
    sysread($Self->{handle}, $buf, $RecordSize);
    $Self->{records_done}++;
    my $rec = $Self->parse_record($buf);
    $Self->{rawrecord} = $buf;
    $Self->{record} = $rec;
    return $rec;
  }
  else {
    delete $Self->{rawrecord};
    delete $Self->{record};
    return undef; # no more records!
  }
}

sub record {
  my $Self = shift;
  my $Field = shift;

  return undef unless ($Self->{record}); 

  if ($Field) {
    return $Self->{record}{$Field};
  }
  return $Self->{record};
}

sub write_header {
  my $Self = shift;
  my $fh = shift;
  my $header = shift;

  my $buf = $Self->make_header($header);
  syswrite($fh, $buf);
}

sub write_record {
  my $Self = shift;
  my $fh = shift;
  my $record = shift;

  my $buf = $Self->make_record($record);
  syswrite($fh, $buf);
}

# SOMEWHAT INTERNAL METHODS

sub make_header {
  my $Self = shift;
  my $ds = shift;

  my @parts;
  foreach my $item (@{$Self->{format}{HeaderFields}}) {
    my $res = convert_to($item->[1], $ds->{$item->[0]});
    unless (length($res) == $item->[2]) {
      # bytes?
      die "Incorrect length for $item->[0]  ($ds->{$item->[0]}) ($res)" . length($res);
    }
    push @parts, $res;
  }

  my $res =  join('', @parts);
  unless (length($res) == $Self->{format}{HeaderSize}) {
    die "Header length mismatch! " . length($res);
  }
  # XXX - check length..
  return $res;
}

sub make_record {
  my $Self = shift;
  my $ds = shift;

  my @parts;
  foreach my $item (@{$Self->{format}{RecordFields}}) {
    my $res = convert_to($item->[1], $ds->{$item->[0]});
    push @parts, $res;
  }

  my $res =  join('', @parts);
  unless (length($res) == $Self->{format}{RecordSize}) {
    die "Record length mismatch!";
  }
  # XXX - check length..
  return $res;
}

sub parse_header {
  my $Self = shift;
  my $buf = shift;

  my $base = 0;
  my %res;
  foreach my $item (@{$Self->{format}{HeaderFields}}) {
    my $raw = substr($buf, $base, $item->[2]);
    $base += $item->[2];
    $res{$item->[0]} = convert_from($item->[1], $raw);
  }

  return \%res;
}

sub parse_record {
  my $Self = shift;
  my $buf = shift;

  # this checks that the parsed size matches the expected size :)
  unless (length($buf) == $Self->{format}{RecordSize}) {
    die "Mismatched record length\n";
  }

  my $base = 0;
  my %res;
  foreach my $item (@{$Self->{format}{RecordFields}}) {
    my $raw = substr($buf, $base, $item->[2]);
    $base += $item->[2];
    $res{$item->[0]} = convert_from($item->[1], $raw);
  }

  return \%res;
}

sub convert_from {
  my $format = shift;
  my $item = shift;
  if ($format eq 'int32' or $format eq 'time_t') {
    return unpack('N', $item);
  }
  elsif ($format eq 'int64') { # ignore start..
    return unpack('N', substr($item, 4));
  }
  elsif ($format eq 'bitmap') {
    return unpack('B*', $item);
  }
  elsif ($format eq 'hex') {
    return unpack('H*', $item);
  }
}

sub convert_to {
  my $format = shift;
  my $item = shift;
  if ($format eq 'int32' or $format eq 'time_t') {
    return pack('N', $item);
  }
  elsif ($format eq 'int64') { # ignore start..
    return pack('NN', 0, $item);
  }
  elsif ($format eq 'bitmap') {
    return pack('B*', $item);
  }
  elsif ($format eq 'hex') {
    return pack('H*', $item);
  }
}
----
Cyrus Home Page: http://cyrusimap.web.cmu.edu/
Cyrus Wiki/FAQ: http://cyrusimap.web.cmu.edu/twiki
List Archives/Info: http://asg.web.cmu.edu/cyrus/mailing-list.html

[Index of Archives]     [Cyrus SASL]     [Squirrel Mail]     [Asterisk PBX]     [Video For Linux]     [Photo]     [Yosemite News]     [gtk]     [KDE]     [Gimp on Windows]     [Steve's Art]

  Powered by Linux