On Sun, Apr 01, 2007 at 07:30:11PM -0400, Miller, Greg wrote: > Hello, > > > > I found myself in a the situation where I need to do the opposite of a > reconstruct on my mailboxes. That is, instead of rebuilding the cyrus.* > files to reflect the messages on the file system, I want to query the > cyrus.* files to see which message files are indexed, and then delete > any extraneous message files on the file system. > > > > My first step is to find a utility that can dump cyrus.index so that I > can determine which message file are valid, and therefore should not be > deleted. This isn't a utility as such, but still: #!/usr/bin/perl require 'IndexFile.pm'; my $path = shift; my $index = Cyrus::IndexFile->new_file($path); while (my $record = $index->next_record()) { print "$record->{Uid}\n"; } I've written enough little tools to unpack parts of the index file that I got sick of it and just wrote something that can pack and unpack the whole thing! Yay for evil little perl utilities. It's not safe against index file format changes, but it's quite extensible in that case, just add some more formats to the start of the file :) Bron.
#!/usr/bin/perl -c # Package to handle Cyrus Index files (version 9 only) package Cyrus::IndexFile; use strict; use warnings; use IO::File; use IO::Handle; use File::Temp; use Data::Dumper; # Set up header and record formatting information {{{ my $VersionFormats = { 9 => { HeaderSize => 96, HeaderFields => _make_fields(<<EOF), Generation int32 4 Format int32 4 MinorVersion int32 4 StartOffset int32 4 RecordSize int32 4 Exists int32 4 LastAppenddate time_t 4 LastUid int32 4 QuotaUsed int64 8 Pop3LastLogin time_t 4 UidValidity int32 4 Deleted int32 4 Answered int32 4 Flagged int32 4 Options bitmap 4 LeakedCache int32 4 HigestModseq int64 8 Spare0 int32 4 Spare1 int32 4 Spare2 int32 4 Spare3 int32 4 Spare4 int32 4 EOF RecordSize => 80, # defined in file too, check it! RecordFields => _make_fields(<<EOF), Uid int32 4 InternalDate time_t 4 SentDate time_t 4 Size int32 4 HeaderSize int32 4 ContentOffset int32 4 CacheOffset int32 4 LastUpdated time_t 4 SystemFlags bitmap 4 UserFlags bitmap 16 ContentLines int32 4 CacheVersion int32 4 MessageUuid hex 12 Modseq int64 8 EOF }, }; sub _make_fields { my $string = shift; my @lines = grep { m/\S/ } split /\n/, $string; my @items; foreach my $line (@lines) { my ($Name, $Type, $Size) = split /\s+/, $line; push @items, [$Name, $Type, $Size]; } return \@items; } # }}} # PUBLIC API sub new { my $class = shift; my $handle = shift; # read header my $buf; # XXX - check for success! sysread($handle, $buf, 12); my $version = convert_from('int32', substr($buf, 8)); my $Self = bless { version => $version }, ref($class) || $class; if (my $frm = $VersionFormats->{$version}) { $Self->{format} = $frm; my $rest; sysread($handle, $rest, $frm->{HeaderSize} - 12); $buf .= $rest; } $Self->{rawheader} = $buf; $Self->{header} = $Self->parse_header($buf); $Self->{records_done} = 0; $Self->{handle} = $handle; return $Self; } sub new_file { my $class = shift; my $file = shift; my $fh = IO::File->new("< $file") || die "Can't open $file for read: $!"; return $class->new($fh); } sub header { my $Self = shift; my $Field = shift; if ($Field) { return $Self->{header}{$Field}; } return $Self->{header}; } sub next_record { my $Self = shift; my $RecordSize = $Self->header('RecordSize'); my $Exists = $Self->header('Exists'); if ($Self->{records_done} < $Exists) { my $buf; sysread($Self->{handle}, $buf, $RecordSize); $Self->{records_done}++; my $rec = $Self->parse_record($buf); $Self->{rawrecord} = $buf; $Self->{record} = $rec; return $rec; } else { delete $Self->{rawrecord}; delete $Self->{record}; return undef; # no more records! } } sub record { my $Self = shift; my $Field = shift; return undef unless ($Self->{record}); if ($Field) { return $Self->{record}{$Field}; } return $Self->{record}; } sub write_header { my $Self = shift; my $fh = shift; my $header = shift; my $buf = $Self->make_header($header); syswrite($fh, $buf); } sub write_record { my $Self = shift; my $fh = shift; my $record = shift; my $buf = $Self->make_record($record); syswrite($fh, $buf); } # SOMEWHAT INTERNAL METHODS sub make_header { my $Self = shift; my $ds = shift; my @parts; foreach my $item (@{$Self->{format}{HeaderFields}}) { my $res = convert_to($item->[1], $ds->{$item->[0]}); unless (length($res) == $item->[2]) { # bytes? die "Incorrect length for $item->[0] ($ds->{$item->[0]}) ($res)" . length($res); } push @parts, $res; } my $res = join('', @parts); unless (length($res) == $Self->{format}{HeaderSize}) { die "Header length mismatch! " . length($res); } # XXX - check length.. return $res; } sub make_record { my $Self = shift; my $ds = shift; my @parts; foreach my $item (@{$Self->{format}{RecordFields}}) { my $res = convert_to($item->[1], $ds->{$item->[0]}); push @parts, $res; } my $res = join('', @parts); unless (length($res) == $Self->{format}{RecordSize}) { die "Record length mismatch!"; } # XXX - check length.. return $res; } sub parse_header { my $Self = shift; my $buf = shift; my $base = 0; my %res; foreach my $item (@{$Self->{format}{HeaderFields}}) { my $raw = substr($buf, $base, $item->[2]); $base += $item->[2]; $res{$item->[0]} = convert_from($item->[1], $raw); } return \%res; } sub parse_record { my $Self = shift; my $buf = shift; # this checks that the parsed size matches the expected size :) unless (length($buf) == $Self->{format}{RecordSize}) { die "Mismatched record length\n"; } my $base = 0; my %res; foreach my $item (@{$Self->{format}{RecordFields}}) { my $raw = substr($buf, $base, $item->[2]); $base += $item->[2]; $res{$item->[0]} = convert_from($item->[1], $raw); } return \%res; } sub convert_from { my $format = shift; my $item = shift; if ($format eq 'int32' or $format eq 'time_t') { return unpack('N', $item); } elsif ($format eq 'int64') { # ignore start.. return unpack('N', substr($item, 4)); } elsif ($format eq 'bitmap') { return unpack('B*', $item); } elsif ($format eq 'hex') { return unpack('H*', $item); } } sub convert_to { my $format = shift; my $item = shift; if ($format eq 'int32' or $format eq 'time_t') { return pack('N', $item); } elsif ($format eq 'int64') { # ignore start.. return pack('NN', 0, $item); } elsif ($format eq 'bitmap') { return pack('B*', $item); } elsif ($format eq 'hex') { return pack('H*', $item); } }
---- Cyrus Home Page: http://cyrusimap.web.cmu.edu/ Cyrus Wiki/FAQ: http://cyrusimap.web.cmu.edu/twiki List Archives/Info: http://asg.web.cmu.edu/cyrus/mailing-list.html