diff options
| author | Guilhem Moulin <guilhem@fripost.org> | 2015-09-05 17:39:53 +0200 | 
|---|---|---|
| committer | Guilhem Moulin <guilhem@fripost.org> | 2015-09-05 17:39:53 +0200 | 
| commit | f3a94832cb19e9aad7f0d6060ac6fee873b05ac2 (patch) | |
| tree | 6270f95fd89714f75e1925cfe1820715547f7fe0 | |
| parent | 8566f8140d92380157ceaef1fed0536e23610d7e (diff) | |
| parent | f5b7ec21e9d53ac7d93797a83c3c100dfa4de076 (diff) | |
Merge branch 'master' into debian
| -rw-r--r-- | INSTALL | 20 | ||||
| -rw-r--r-- | README | 59 | ||||
| -rwxr-xr-x | imapsync | 73 | ||||
| -rw-r--r-- | imapsync.1 | 10 | ||||
| -rw-r--r-- | lib/Net/IMAP/Sync.pm | 31 | 
5 files changed, 171 insertions, 22 deletions
| @@ -0,0 +1,20 @@ +imapsync depends on the following Perl modules: + +  - Config::Tiny +  - DBI +  - DBD::SQLite +  - Getopt::Long +  - MIME::Base64 (core module) if authentication is required +  - IO::Select (core module) +  - IO::Socket::INET (core module) for 'type=imap' +  - IO::Socket::SSL for 'type=imaps' (or 'type=imap' and 'STARTTLS=YES') +  - IPC::Open2 (core module) for 'type=tunnel' +  - List::Util (core module) +  - POSIX (core module) if 'logfile' is set +  - Socket (core module) +  - Time::HiRes (core module) if 'logfile' is set + +On Debian GNU/Linux systems, these modules can be installed with the +following command: + +  apt-get install libconfig-tiny-perl libdbi-perl libdbd-sqlite3-perl libio-socket-ssl-perl @@ -0,0 +1,59 @@ +imapsync performs stateful synchronization between two IMAP4rev1 +servers.  Such synchronization is made possible by the QRESYNC extension +from [RFC7162]; for convenience reasons servers must also support +LIST-EXTENDED [RFC5258], LIST-STATUS [RFC5819] and UIDPLUS [RFC4315]. +Furthermore, while imapsync can work with servers lacking support for +LITERAL+ [RFC2088] and MULTIAPPEND [RFC3502], these extensions greatly +improve performance by reducing the number of required round trips hence +are recommended. + + +Stateful synchronization is only possible for mailboxes supporting +persistent message Unique Identifiers (UID) and persistent storage of +mod-sequences (MODSEQ); any non-compliant mailbox will cause imapsync to +abort.  Furthermore, because UIDs are allocated not by the client but by +the server, imapsync needs to keep track of associations between local +and remote UIDs for each mailbox.  The synchronization state of a +mailbox consists of its UIDNEXT and HIGHESTMODSEQ values on each server; +it is then assumed that each message with UID < $UIDNEXT have been +replicated to the other server, and that the metadata (such as flags) of +each message with MODSEQ <= $HIGHESTMODSEQ have been synchronized. +Conceptually, the synchronization algorithm is derived from [RFC4549] +with the [RFC7162, section 6] amendments, and works as follows: + + + 1. SELECT (on both servers) a mailbox the current UIDNEXT or +    HIGHESTMODSEQ values of which differ from the values found in the +    database (for either server).  Use the QRESYNC SELECT parameter from +    [RFC7162] to list changes (vanished messages and flag updates) since +    $HIGHESTMODSEQ to messages with UID<$UIDNEXT. + + 2. Propagate these changes onto the other server: get the corresponding +    UIDs from the database, then a/ issue an UID STORE + UID EXPUNGE +    command to remove messages that have not already been deleted on +    both servers, and b/ issue UID STORE commands to propagate flag +    updates (send a single command for each flag list in order the +    reduce the number of round trips).  (Conflicts may occur if the +    metadata of a message has been updated on both servers with +    different flag lists; in that case imapsync issues a warning and +    updates the message on each server with the union of both flag +    lists.)  Repeat this step if the server sent some updates in the +    meantime.  Otherwise, update the HIGHESTMODSEQ value in the +    database. + + 3. Process new messages (if the current UIDNEXT value differ from the +    one found in the database) by issuing an UID FETCH command and for +    each message RFC822 body received, issue an APPEND command to the +    other server on-the-fly.  Repeat this step if the server received +    new messages in the meantime.  Otherwise, update the UIDNEXT value +    in the database.  Go back to step 2 if the server sent some updates +    in the meantime. + + 4. Go back to step 1 to proceed with the next unsynchronized mailbox. + + +Consult the manual for more information. + +imapsync is Copyright© 2015 Guilhem Moulin ⟨guilhem@fripost.org⟩, and +licensed for use under the GNU General Public License version 3 or +later.  See ‘COPYING’ for specific terms and distribution information. @@ -559,6 +559,13 @@ my $STH_GET_INTERRUPTED_BY_IDX = $DBH->prepare(q{      WHERE m.idx = ? AND (lUID >= l.UIDNEXT OR rUID >= r.UIDNEXT)  }); +# Count messages +my $STH_COUNT_MESSAGES = $DBH->prepare(q{SELECT COUNT(*) FROM mapping WHERE idx = ?}); + +# List last 1024 messages UIDs +my $STH_LASTUIDs_LOCAL  = $DBH->prepare(q{SELECT rUID FROM mapping WHERE idx = ? ORDER BY rUID DESC LIMIT 1024}); +my $STH_LASTUIDs_REMOTE = $DBH->prepare(q{SELECT lUID FROM mapping WHERE idx = ? ORDER BY lUID DESC LIMIT 1024}); +  # Download some missing UIDs from $source; returns the thew allocated UIDs  sub download_missing($$$@) { @@ -612,6 +619,64 @@ sub delete_mapping($$) {  } +# Create a sample (UIDs, sequence numbers) to use as 3rd and 4th +# argument of the QRESYNC parameters to the SELECT command. +# QRESYNC [RFC7162] doesn't force the server to remember the MODSEQs of +# EXPUNGEd messages.  By passing a sample of known UIDs/sequence numbers +# we let the server know that the messages have been EXPUNGEd [RFC7162, +# section 3.2.5.2]. +# The UID set is the largest set of higest UIDs with at most 1024 UIDs, +# of length (after compacting) at most 64. +# The reason why we sample with the highest UIDs is that lowest UIDs are +# less likely to be deleted. +sub sample($$$) { +    my ($idx, $count, $sth) = @_; +    return unless $count > 0; + +    my ($n, $uids, $min, $max); +    $sth->execute($idx); +    while (defined (my $row = $sth->fetchrow_arrayref())) { +        my $k = $row->[0]; +        if (!defined $min and !defined $max) { +            $n = 0; +            $min = $max = $k; +        } +        elsif ($k == $min - 1) { +            $min--; +        } +        else { +            $n += $max - $min + 1; +            $uids = ($min == $max ? $min : "$min:$max") +                   .(defined $uids ? ','.$uids : ''); +            $min = $max = $k; +            if (length($uids) > 64) { +                $sth->finish(); # done with the statement +                last; +            } +        } +    } +    if (!defined $uids or length($uids) <= 64) { +        $n += $max - $min + 1; +        $uids = ($min == $max ? $min : "$min:$max") +               .(defined $uids ? ','.$uids : ''); +    } +    return ( $uids, ($count - $n + 1).':'.$count ); +} + + +# Issue a SELECT command with the given $mailbox. +sub select_mbx($$) { +    my ($idx, $mailbox) = @_; + +    $STH_COUNT_MESSAGES->execute($idx); +    my ($count) = $STH_COUNT_MESSAGES->fetchrow_array(); +    die if defined $STH_COUNT_MESSAGES->fetch(); # sanity check + +    $lIMAP->select($mailbox, sample($idx, $count, $STH_LASTUIDs_LOCAL)); +    $rIMAP->select($mailbox, sample($idx, $count, $STH_LASTUIDs_REMOTE)); +} + +  # Check and repair synchronization of a mailbox between the two servers  # (in a very crude way, by downloading all existing UID with their flags)  sub repair($) { @@ -622,8 +687,7 @@ sub repair($) {      die if defined $STH_GET_INDEX->fetch(); # sanity check      return unless defined $idx; # not in the database -    $lIMAP->select($mailbox); -    $rIMAP->select($mailbox); +    select_mbx($idx, $mailbox);      $STH_GET_CACHE_BY_IDX->execute($idx);      my $cache = $STH_GET_CACHE_BY_IDX->fetchrow_hashref() // return; # no cache @@ -1005,7 +1069,7 @@ sub wait_notifications(;$) {  ############################################################################# -# Resume interrupted mailbox syncs. +# Resume interrupted mailbox syncs (before initializing the cache).  #  my ($MAILBOX, $IDX);  $STH_LIST_INTERRUPTED->execute(); @@ -1101,8 +1165,7 @@ while(1) {              die if defined $STH_GET_INDEX->fetch(); # sanity check              die unless defined $IDX; # sanity check; -            $lIMAP->select($MAILBOX); -            $rIMAP->select($MAILBOX); +            select_mbx($IDX, $MAILBOX);              if (!$KNOWN_INDEXES{$IDX}) {                  $STH_INSERT_LOCAL->execute( $IDX, $lIMAP->uidvalidity($MAILBOX)); @@ -47,7 +47,7 @@ messages with UID<$UIDNEXT.  Propagate these changes onto the other server: get the corresponding  UIDs from the database, then a/ issue an UID STORE + UID EXPUNGE command  to remove messages that have not already been deleted on both servers, -and /b issue UID STORE commands to propagate flag updates (send a single +and b/ issue UID STORE commands to propagate flag updates (send a single  command for each flag list in order the reduce the number of round  trips).  (Conflicts may occur if the metadata of a message has been updated on @@ -55,7 +55,7 @@ both servers with different flag lists; in that case \fBimapsync\fR  issues a warning and updates the message on each server with the union  of both flag lists.)  Repeat this step if the server sent some updates in the meantime. -Otherwise, update the HIGHESTMODSEQ values in the database. +Otherwise, update the HIGHESTMODSEQ value in the database.  .IP \n+[step].  Process new messages (if the current UIDNEXT value differ from the one @@ -63,7 +63,7 @@ found in the database) by issuing an UID FETCH command and for each  message RFC822 body received, issue an APPEND command to the other  server on\-the\-fly.  Repeat this step if the server received new messages in the meantime. -Otherwise, update the UIDNEXT values in the database. +Otherwise, update the UIDNEXT value in the database.  Go back to step 2 if the server sent some updates in the meantime.  .IP \n+[step]. @@ -257,7 +257,7 @@ on its standard output, and understand it on its standard input.  .TP  .I STARTTLS -Whether to use the \(lqSTARTTLS\(rq directive to upgrade a secure +Whether to use the \(lqSTARTTLS\(rq directive to upgrade to a secure  connection.  Setting this to \(lqYES\(rq for a server not advertising  the \(lqSTARTTLS\(rq capability causes \fBimapsync\fR to immediately  abort the connection. @@ -312,7 +312,7 @@ empty database will duplicate each message due to the absence of  local/remote UID association.  .IP \[bu]  \fBimapsync\fR is single threaded and doesn't use IMAP command -pipelining.  Synchronization could be boosted by sending independent +pipelining.  Synchronization could be boosted up by sending independent  commands (such as the initial LIST/STATUS command) to each server in  parallel, and for a given server, by sending independent commands (such  as flag updates) in a pipeline. diff --git a/lib/Net/IMAP/Sync.pm b/lib/Net/IMAP/Sync.pm index 85ca487..ca85a54 100644 --- a/lib/Net/IMAP/Sync.pm +++ b/lib/Net/IMAP/Sync.pm @@ -482,19 +482,21 @@ sub search($$) {  } -# $self->select($mailbox) -# $self->examine($mailbox) +# $self->select($mailbox,  [$UIDs, $seqs]) +# $self->examine($mailbox, [$UIDs, $seqs])  #   Issue a SELECT or EXAMINE command for the $mailbox. Upon success,  #   change the state to SELECTED, otherwise go back to AUTH. -sub select($$) { +#   The optional $UIDs and $seqs are passed are 3rd and 4th arguments to +#   the QRESYNC parameter, respectively. +sub select($$;$$) {      my $self = shift;      my $mailbox = shift; -    $self->_select_or_examine('SELECT', $mailbox); +    $self->_select_or_examine('SELECT', $mailbox, @_);  } -sub examine($$) { +sub examine($$;$$) {      my $self = shift;      my $mailbox = shift; -    $self->_select_or_examine('EXAMINE', $mailbox); +    $self->_select_or_examine('EXAMINE', $mailbox, @_);  } @@ -1276,13 +1278,16 @@ sub _open_mailbox($$) {  } -# $self->_select_or_examine($command, $mailbox) +# $self->_select_or_examine($command, $mailbox, [$UIDs, $seqs])  #   Issue a SELECT or EXAMINE command for the $mailbox.  Upon success,  #   change the state to SELECTED, otherwise go back to AUTH. -sub _select_or_examine($$$) { +#   The optional $UIDs and $seqs are passed are 3rd and 4th arguments to +#   the QRESYNC parameter, respectively. +sub _select_or_examine($$$;$$) {      my $self = shift;      my $command = shift;      my $mailbox = shift; +    my ($uids, $seqs) = @_;      my $pcache = $self->{_PCACHE}->{$mailbox} //= {};      my $cache = $self->{_CACHE}->{$mailbox} //= {}; @@ -1290,10 +1295,12 @@ sub _select_or_examine($$$) {      $mailbox = uc $mailbox eq 'INBOX' ? 'INBOX' : $mailbox; # INBOX is case-insensitive      $command .= ' '.quote($mailbox); -    $command .= " (QRESYNC ($pcache->{UIDVALIDITY} $pcache->{HIGHESTMODSEQ} " -                           ."1:".($pcache->{UIDNEXT}-1)."))" -        if $self->_enabled('QRESYNC') and -           ($pcache->{HIGHESTMODSEQ} // 0) > 0 and ($pcache->{UIDNEXT} // 1) > 1; +    if ($self->_enabled('QRESYNC') and ($pcache->{HIGHESTMODSEQ} // 0) > 0 and ($pcache->{UIDNEXT} // 1) > 1) { +        $command .= " (QRESYNC ($pcache->{UIDVALIDITY} $pcache->{HIGHESTMODSEQ} " +                               ."1:".($pcache->{UIDNEXT}-1); +        $command .= " ($uids $seqs)" if defined $uids and defined $seqs; +        $command .= "))"; +    }      if ($self->{_STATE} eq 'SELECTED' and ($self->_capable('CONDSTORE') or $self->_capable('QRESYNC'))) {          # A mailbox is currently selected and the server advertises | 
