diff options
-rw-r--r-- | Changelog | 4 | ||||
-rw-r--r-- | benchmark/dovecot.conf | 55 | ||||
-rwxr-xr-x | benchmark/random_maildir.pl | 57 | ||||
-rwxr-xr-x | benchmark/run | 506 | ||||
-rw-r--r-- | doc/benchmark.md | 279 | ||||
-rw-r--r-- | doc/getting-started.md | 11 | ||||
-rw-r--r-- | doc/interimap.1.md | 6 | ||||
-rw-r--r-- | doc/pullimap.1.md | 4 | ||||
-rw-r--r-- | doc/template.html | 3 | ||||
-rwxr-xr-x | interimap | 5 | ||||
-rw-r--r-- | lib/Net/IMAP/InterIMAP.pm | 4 | ||||
-rwxr-xr-x | pullimap | 13 | ||||
-rw-r--r-- | tests/pullimap/t | 61 | ||||
-rw-r--r-- | tests/resume/t | 2 | ||||
-rwxr-xr-x | tests/run | 6 |
15 files changed, 986 insertions, 30 deletions
@@ -105,6 +105,10 @@ interimap (0.5) upstream; - interimap: for the reason explained above, limit number of messages to 128 per APPEND command (only on servers advertizing MULTIAPPEND, for other servers the number remains 1). + - interimap: gracefully ignore messages with a NIL RFC822 attribute. + - pullimap: treat messages with a NIL RFC822 attribute as empty. + - pullimap: fix mangling of data lines starting with a dot (when an RFC + 5322 line starts with a '.', double it). -- Guilhem Moulin <guilhem@fripost.org> Fri, 10 May 2019 00:58:14 +0200 diff --git a/benchmark/dovecot.conf b/benchmark/dovecot.conf new file mode 100644 index 0000000..55301d9 --- /dev/null +++ b/benchmark/dovecot.conf @@ -0,0 +1,55 @@ +log_path = /dev/shm/mail.log +mail_home = /dev/shm/vmail/%u +mail_location = mdbox:~/mail +ssl = no + +listen = 127.0.0.1 +namespace { + inbox = yes + separator = / +} + +# https://wiki.dovecot.org/HowTo/Rootless +base_dir = /dev/shm/dovecot/run +default_internal_user = nobody +default_internal_group = nogroup +default_login_user = nobody + +service anvil { + chroot = +} +service imap-login { + chroot = +} +service stats { + chroot = +} + +passdb { + args = scheme=PLAIN username_format=%u /dev/shm/dovecot/users + driver = passwd-file +} +userdb { + args = username_format=%u /dev/shm/dovecot/users + driver = passwd-file +} + +protocols = imap + +mail_plugins = zlib +protocol imap { + mail_plugins = imap_zlib +} + +service imap-login { + inet_listener imap { + # disable + port = 0 + } + inet_listener interimap { + port = 10143 + } + inet_listener offlineimap { + port = 10144 + } +} diff --git a/benchmark/random_maildir.pl b/benchmark/random_maildir.pl new file mode 100755 index 0000000..363eb41 --- /dev/null +++ b/benchmark/random_maildir.pl @@ -0,0 +1,57 @@ +#!/usr/bin/perl + +#---------------------------------------------------------------------- +# Generate a random mbox +# Copyright © 2019 Guilhem Moulin <guilhem@fripost.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +#---------------------------------------------------------------------- + +use warnings; +use strict; + +use Time::HiRes (); +use POSIX qw{strftime setlocale}; +use Crypt::URandom "urandom"; + +setlocale(POSIX::LC_TIME, "C"); + +foreach (qw/cur new tmp/) { + my $d = $ARGV[0] ."/". $_; + mkdir $d, 0700 or die "mkdir: $!" +} + +my $NEW = $ARGV[0] ."/new"; +for (my $i = 0; $i < $ARGV[1]; $i++) { + my $timestamp = Time::HiRes::gettimeofday(); + my $filename = sprintf("%10d.M%010d.localhost", $timestamp, $i); + open my $out, ">", $NEW ."/". $filename or die "open: $!"; + + my $message_id = sprintf "%.10f\@example.net", $timestamp; + my $sender = unpack("H*", urandom(8))."\@example.net"; + print $out + "From: <$sender>\r\n" + . "To: <recipient\@example.net> \r\n" + . "Subject: Hello world! \r\n" + . "Date: ". strftime("%a, %e %b %Y %H:%M:%S %z", localtime($timestamp)). "\r\n" + . "Message-ID: ". sprintf("<%.10f\@example.net>", $timestamp)."\r\n" + . "\r\n"; + + my $len = 1 + int(rand(4095)); + my $body = unpack("H*", urandom($len)); + for (my $i = 0; $i < 2*$len; $i += 70) { + print $out substr($body, $i, 70), "\r\n"; + } + close($out); +} diff --git a/benchmark/run b/benchmark/run new file mode 100755 index 0000000..4a83c68 --- /dev/null +++ b/benchmark/run @@ -0,0 +1,506 @@ +#!/bin/bash + +set -ue +PATH="/usr/sbin:/usr/bin:/sbin:/bin" +export PATH + +unset CHROOT NETNS +cleanup() { + if [ "${CHROOT+x}" ]; then + schroot --end-session --chroot="$CHROOT" + fi + if [ "${NETNS+x}" ]; then + ip netns del "$NETNS" + fi +} +trap cleanup EXIT INT TERM + +# create new CHROOT +DEB_BUILD_ARCH="$(dpkg-architecture -qDEB_BUILD_ARCH)" +CHROOT="$(schroot -c "unstable-$DEB_BUILD_ARCH-sbuild" -b)" +ROOTDIR="/run/schroot/mount/$CHROOT" + +# create new network namespace and place counters to measure network usage +ip netns add "${NETNS:="interimap-benchmark"}" +ip netns exec "$NETNS" nft -f- <<- EOF + flush ruleset + table inet filter { + counter interimap-in { } + counter interimap-out { } + counter offlineimap-in { } + counter offlineimap-out { } + chain input { + type filter hook input priority 0 + iif "lo" ip daddr 127.0.0.1 tcp dport 10143 counter name interimap-in + iif "lo" ip daddr 127.0.0.1 tcp dport 10144 counter name offlineimap-in + } + chain output { + type filter hook output priority 0 + oif "lo" ip saddr 127.0.0.1 tcp sport 10143 counter name interimap-out + oif "lo" ip saddr 127.0.0.1 tcp sport 10144 counter name offlineimap-out + } + } +EOF +ip netns exec "$NETNS" ip addr add "127.0.0.1" dev "lo" +ip netns exec "$NETNS" ip link set "lo" up + +# resize the partition so it can hold the mail stores (you may want to +# turn swap off too) +mount -o"remount,size=15G" "/run/schroot/mount/$CHROOT/dev/shm" + +# install dependencies +schroot --directory="/" --chroot="$CHROOT" -r -- \ + env DEBIAN_FRONTEND="noninteractive" apt-get install \ + --no-install-recommends --assume-yes \ + time dovecot-imapd offlineimap \ + libconfig-tiny-perl libdbd-sqlite3-perl libnet-ssleay-perl \ + libcrypt-urandom-perl procps + +# run a command in the chroot +jail() { + local user="" home="" + case "${u:-local}" in + local) user="nobody"; home="/dev/shm/nobody";; + remote) user="user"; home="/dev/shm/vmail/user";; + esac + ip netns exec "$NETNS" \ + schroot --directory="/dev/shm/nobody" --user="nobody" --chroot="$CHROOT" -r \ + -- env -i PATH="/usr/bin:/bin" USER="$user" HOME="$home" "$@" +} + +# run a command in the chroot in a monitored fashion +jail_stat() { + local e U S M P u="local" + local counters="$ROOTDIR/tmp/counters.$1.json" + ip netns exec "$NETNS" nft reset counter inet filter "$1-in" >/dev/null + ip netns exec "$NETNS" nft reset counter inet filter "$1-out" >/dev/null + + jail time --format="%e\\t%U\\t%S\\t%M\\t%P" --output="/tmp/time.$1" \ + -- "$@" >/dev/null || true + IFS=$'\t' read e U S M P <"$ROOTDIR/tmp/time.$1" + + local i="${NAME:-$1}" + local a="${i#* }" + [ "$a" = "$i" ] && a=" " || a=" $a" + + printf "%11s%s" "${i%% *}" "$a" + printf " %5.2fs %6.2fs" "$U" "$S" + if [ "${IDLE:-n}" = "n" ]; then + printf " %5.2fs %4s" "$e" "$P" + fi + printf " %8s" "${M}k" + + ip netns exec "$NETNS" nft -j list counters | jq ".nftables + | map(select(.counter) | .counter | { key: .name, value: {packets, bytes} }) + | from_entries" >"$counters" + + local ib ip ob op + ib="$(bytes "$(jq ".\"$1-in\".bytes" <"$counters")")" + ob="$(bytes "$(jq ".\"$1-out\".bytes" <"$counters")")" + ip="$(_units "$(jq ".\"$1-in\".packets" <"$counters")" 1000)" + op="$(_units "$(jq ".\"$1-out\".packets" <"$counters")" 1000)" + printf " %8s / %-7s" "$ob" "$ib" # inverse for the client's perspective + printf " %8s / %-7s" "$op" "$ip" + printf "\\n" +} + +# display metrics headers +headers() { + declare -a h=(" user" " system") + if [ "${IDLE:-n}" = "n" ]; then + h+=( " real" " CPU" ) + fi + h+=( " max RSS" " traffic (in/out) " " packets (in/out) " ) + local x="offlineimap -q" i + + printf "%s" "${x//?/ }" + for i in "${h[@]}"; do + printf " %s" "$i" + done + printf "\\n" + + printf "%s" "${x//?/-}" + for i in "${h[@]}"; do + printf " %s" "${i//?/-}" + done + printf "\\n" +} + +# install Dovecot's "system" configuration and start the server +install -onobody -gnogroup -m0700 --directory \ + "$ROOTDIR/dev/shm/dovecot" \ + "$ROOTDIR/dev/shm/vmail" \ + "$ROOTDIR/dev/shm/nobody" + +install -onobody -gnogroup -m0644 \ + "./benchmark/dovecot.conf" \ + "$ROOTDIR/dev/shm/dovecot/config" +jail /usr/sbin/dovecot -c"/dev/shm/dovecot/config" + +install -onobody -gnogroup -m0600 /dev/null \ + "$ROOTDIR/dev/shm/dovecot/users" +PASSWORD="$(xxd -l16 -p </dev/urandom)" +printf "%s:%s:::::\\n" "user" "$PASSWORD" \ + >"$ROOTDIR/dev/shm/dovecot/users" + +# install user configuration for Dovecot, interimap, and offlineimap +cat >"$ROOTDIR/dev/shm/nobody/.dovecot.conf" <<-EOF + log_path = /dev/null + mail_home = /dev/shm/nobody + mail_location = maildir:~/Maildir + ssl = no +EOF + +install -onobody -gnogroup -Dm0700 --directory \ + "$ROOTDIR/dev/shm/nobody/.config/interimap" \ + "$ROOTDIR/dev/shm/nobody/.local/share" + +cat >"$ROOTDIR/dev/shm/nobody/.config/interimap/config" <<-EOF + database = bench.db + + [local] + type = tunnel + command = doveadm -c/dev/shm/nobody/.dovecot.conf exec imap + null-stderr = YES + + [remote] + type = imap + host = 127.0.0.1 + port = 10143 + STARTTLS = no + username = user + password = $PASSWORD +EOF + +cat >"$ROOTDIR/dev/shm/nobody/.offlineimaprc" <<-EOF + [general] + accounts = bench + + [Account bench] + localrepository = local + remoterepository = remote + + [Repository local] + type = Maildir + localfolders = ~/Maildir2 + + [Repository remote] + type = IMAP + remotehost = 127.0.0.1 + remotepass = $PASSWORD + remoteport = 10144 + remoteuser = user + ssl = no + starttls = no + # keep the default (no) as it doesn't seem to work with large mailboxes, perhaps + # due to https://dovecot.org/pipermail/dovecot/2019-November/117522.html + #usecompression = yes +EOF + +# install interimap's development version +install -oroot -groot -m0755 -Dt "/$ROOTDIR/usr/bin" \ + ./interimap ./benchmark/random_maildir.pl +install -oroot -groot -Dm0644 \ + ./lib/Net/IMAP/InterIMAP.pm "$ROOTDIR/usr/share/perl5/Net/IMAP/InterIMAP.pm" + +# create a random mail store at mdbox:~/mail.back +prepare() { + local u="remote" d m n seqs + local maildir="/dev/shm/vmail/user/maildir" + + clear + jail rm -rf -- "$maildir" "/dev/shm/vmail/user/mail.back" + + for m in "${!MAILBOXES[@]}"; do + [ "${m^^[a-z]}" = "INBOX" ] && d="$maildir" || d="$maildir/.$m" + jail mkdir -p -- "$d" + # create 20% more; will be deleted afterwards (having only + # contiguous UIDs might bias the metrics) + n="${MAILBOXES["$m"]}" + jail random_maildir.pl "$d" $((n+n/5)) + done + + # convert to mdbox + jail doveadm -c"/dev/shm/dovecot/config" -omail_location="maildir:~/maildir" \ + sync "mdbox:~/mail.back" + jail rm -rf -- "$maildir" + + # expunge 20% and purge + for m in "${!MAILBOXES[@]}"; do + n="${MAILBOXES["$m"]}" + seqs="$(shuf -n $((n/5)) -i"1-$n")" + jail doveadm -c"/dev/shm/dovecot/config" -omail_location="mdbox:~/mail.back" \ + expunge mailbox "$m" "${seqs//$'\n'/,}" + done + jail doveadm -c"/dev/shm/dovecot/config" -omail_location="mdbox:~/mail.back" purge +} + +# populate a clientn from backup mailstore mdbox:~/mail.back (copied to +# avoid recreating / conversion) +populate() { + local m u="remote" cmd + clear + + if [ "${KEEP_BACKUP:-y}" = "n" ]; then + jail mv -T "/dev/shm/vmail/user/mail.back" "/dev/shm/vmail/user/mail" + else + jail cp -aT "/dev/shm/vmail/user/mail.back" "/dev/shm/vmail/user/mail" + fi + + # force dovecot to index and compute the state, otherwise the first + # thing to query might be disadvantaged + jail doveadm -c"/dev/shm/dovecot/config" index "INBOX" + jail doveadm -c"/dev/shm/dovecot/config" mailbox status "all" "*" >/dev/null + + u="local" + # initial configuration + for cmd in "$@"; do + case "$cmd" in + interimap) jail interimap --quiet 2>/dev/null;; + offlineimap) jail offlineimap -u quiet 2>/dev/null;; + *) exit 1;; + esac + done +} + +# remove interimap / offlineimap database and mail store (but keep +# mdbox:~/mail.back) +clear() { + jail rm -rf -- \ + "/dev/shm/vmail/user/mail" \ + "/dev/shm/nobody/.local/share/interimap/bench.db" \ + "/dev/shm/nobody/.offlineimap" \ + "/dev/shm/nobody/Maildir" \ + "/dev/shm/nobody/Maildir2" +} + +# pretty print a number in k/M/G/T etc +_units() { + local n=$(( $1 )) b="$2" s u="" + [ $b -eq 1024 ] && s="i" || s="" + while [ ${#n} -gt 4 ]; do + case "$u" in + "") u="k";; + k) u="M";; + M) u="G";; + G) u="T";; + *) break;; + esac + n=$((n/b)) + done + printf "%d%s" "$n" "${u:+$u$s}" +} +bytes() { printf "%sB" "$(_units "$1" 1024)"; } + +# generate and deliver a random message +sample_message() { + local date="$(date +"%s.%N")" + cat <<-EOF + From: <sender@example.net> + To: <recipient@example.net> + Date: $(date -R -d@"$date") + Message-ID: <$date@example.net> + + EOF + xxd -ps -c30 -l2048 /dev/urandom # 4165 bytes +} +deliver() { + local m="$1" u="remote" + jail doveadm -c"/dev/shm/dovecot/config" exec dovecot-lda -e -m "$m" +} + +# write down markdown title +title() { + local x="$1" h="$2" + printf "\\n%s\\n%s\\n" "$h" "${h//?/$x}" +} + +# run benchmark for `interimap` / `offlineimap -q` / `offlineimap`: +# populate, run optional actions (such as delivery), then sync again in +# a monitored fashion +run-all() { + local a cmd u q="" NAME="" + for cmd in "interimap" "offlineimap -q" "offlineimap"; do + populate "${cmd%% *}" + case "${cmd%% *}" in + interimap) q="--quiet";; + offlineimap) q="-u quiet";; + *) exit 1; + esac + for a in "$@"; do "$a"; done + NAME="$cmd" jail_stat $cmd $q 2>/dev/null + done +} + + +echo; echo +title "=" "Single mailbox" + +cat <<-EOF + + We create a mailbox on the remote server, populate it with a number of + messages, and synchronize it locally. We then collect metrics for no-op + synchronization (i.e., of mailboxes that are already in sync), and + reconciliation after receiving a *single* message on the remote server. +EOF + +# generate a message to be used in *all* "Single mailbox" tests +sample_message >"$ROOTDIR/tmp/msg1" +activity1() { + deliver "inbox" <"$ROOTDIR/tmp/msg1" +} + + +declare -A MAILBOXES +for n in 100 1000 10000 100000; do + title "-" "$n messages" + MAILBOXES=( ["inbox"]="$n" ) + prepare + + printf "\\n### %s ###\\n\\n" "No-op (in sync)" + headers + run-all + + printf "\\n### %s ###\\n\\n" "Reconciliation" + headers + run-all activity1 +done + + +m=75 +echo; echo +title "=" "$m mailboxes" + +cat <<-EOF + + We create $m mailboxes on the remote server, populate them with an equal + number of messages, and synchronize them locally. We then collect + metrics for no-op synchronization (i.e., of mailboxes that are already + in sync), and reconciliation after the following changes are being + applied to the remote server: + + - 3 *new* messages (two on mailbox #2, one on mailbox #3); and + - 5 existing messages *EXPUNGEd* (two on mailboxes #3 and #4, one on + mailbox #5). +EOF + +# generate more messages to be used in *all* "$m mailboxes" tests +sample_message >"$ROOTDIR/tmp/msg2" +sample_message >"$ROOTDIR/tmp/msg3" + +activity2() { + local u="remote" + deliver "mailbox2" <"$ROOTDIR/tmp/msg1" + deliver "mailbox2" <"$ROOTDIR/tmp/msg2" + deliver "mailbox3" <"$ROOTDIR/tmp/msg3" + # intentionally modify the remote only because not all local backend speak IMAP + jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox "mailbox3" "1:2" + jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox "mailbox4" "1,3" + jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox "mailbox5" "*" +} + +for n in 100 1000 10000; do + title "-" "$n messages per mailbox" + + MAILBOXES=( ["inbox"]="$n" ) + for ((i=2; i<=$m; i++)); do + MAILBOXES["mailbox$i"]="$n" + done + prepare + + printf "\\n### %s ###\\n\\n" "No-op (in sync)" + headers + run-all + + printf "\\n### %s ###\\n\\n" "Reconciliation" + headers + run-all activity2 +done + + +title "=" "Live synchronization" +timeout=$((6 * 3600)) +step=5 + +MAILBOXES=( ["inbox"]=100000 ["xlarge"]=100000 ) +for ((i=0; i<10; i++)); do + MAILBOXES["large$i"]=10000 +done +for ((i=0; i<20; i++)); do + MAILBOXES["medium$i"]=5000 +done +for ((i=0; i<45; i++)); do + MAILBOXES["small$i"]=2000 +done +for ((i=0; i<20; i++)); do + MAILBOXES["xsmall$i"]=500 +done + +n=0 +for i in "${MAILBOXES[@]}"; do + n=$(( n + i )) +done + +cat <<-EOF + + ${#MAILBOXES[@]} mailboxes, $n messages in total: + + - 2 with 100000 messages; + - 10 with 10000 messages; + - 20 with 5000 messages; + - 45 with 2000 messages; and + - 20 with 500 messages. + + The two local mail stores (respectively for [InterIMAP] and + [OfflineIMAP]) are initially in sync with the remote server, and we keep + long-running “autorefresh” synchronization processes alive for 6h, with + updates being regularly applied to the remote server: every $step seconds, + + - a new message is delivered to a random mailbox with 5% probability + (once every $((20*step))s on average); + - a random message is EXPUNGEd with 5% probability (once every $((20*step))s on + average); and + - a random message is marked as seen with 10% probability (once every + $((10*step))s on average). + + \`interimap\` is configured to sync every *30s*. \`offlineimap\` is + configured to quick sync very *30s*, with a regular sync every *1h*. + +EOF + +IDLE="y" headers +prepare +KEEP_BACKUP="n" populate "interimap" "offlineimap" + +IDLE="y" jail_stat interimap --quiet --watch=30 2>/dev/null & +IDLE="y" jail_stat offlineimap -u quiet -k "Account_bench:autorefresh=0.5" \ + -k "Account_bench:quick=120" 2>/dev/null & + +u="remote" +timeout=$(( $(date +%s) + timeout )) +while [ $(date +%s) -lt $timeout ]; do + n="$(shuf -n1 -i1-100)" + if [ $n -le 5 ]; then + # deliver to a random mailbox on the remote + m="$(shuf -n1 -e -- "${!MAILBOXES[@]}")" + sample_message | deliver "$m" + fi + n="$(shuf -n1 -i1-100)" + if [ $n -le 5 ]; then + # expunge a random message on the remote + read guid uid < <(jail doveadm -c"/dev/shm/dovecot/config" search all | shuf -n1) + jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox-guid "$guid" uid "$uid" + fi + n="$(shuf -n1 -i1-100)" + if [ $n -le 10 ]; then + # mark a random message as seen + read guid uid < <(jail doveadm -c"/dev/shm/dovecot/config" search all | shuf -n1) + jail doveadm -c"/dev/shm/dovecot/config" flags add "\\Seen" mailbox-guid "$guid" uid "$uid" + fi + sleep $step +done + +jail pkill -TERM -u"nobody" -s0 interimap +sleep 0.2 # give a chance to print the stats +jail pkill -SIGABRT -u"nobody" -s0 offlineimap +wait diff --git a/doc/benchmark.md b/doc/benchmark.md new file mode 100644 index 0000000..72f51a4 --- /dev/null +++ b/doc/benchmark.md @@ -0,0 +1,279 @@ +% InterIMAP benchmark metrics and comparison +% [Guilhem Moulin](mailto:guilhem@fripost.org) + +The [IMAP `QRESYNC` extension][RFC 7162] allows efficient mailbox +synchronization, in terms of I/O as well as CPU usage. In this document +we give some benchmark metrics to compare [InterIMAP]'s network usage with +so-called full synchronization solutions such as [OfflineIMAP]. The +timings are to be taken with a grain of salt, though: they likely won't +reflect real-world situations as the emails are stored in RAM for this +benchmark, and all network access is on the loopback interface. (Moreover +neither SSL/TLS nor STARTTLS are being used in the below. They would add +another 2-3 round-trips per connection.) + +These metrics show how [InterIMAP] scales linearly with the number of +*mailboxes* — pretty much regardless of how many messages they contain (at +least as long as the server can cope with large mailboxes) — while +[OfflineIMAP] scales with the number of *messages* on active mailboxes. + +While [InterIMAP] performs significantly better (especially given that it +can be relied upon to synchronize flag changes, unlike [OfflineIMAP]'s +“quick” mode), it should be noted that efficiency comes at the expense of +flexibility. In particular it's not possible to exclude old messages from +synchronization (mailboxes can be excluded but finer granularity is not +possible). And of course not all IMAP servers support [`QRESYNC`][RFC 7162] +and other extensions [InterIMAP] requires. Furthermore [InterIMAP] is +single threaded and doesn't use pipelining at the moment. (Concurrency +opens a can of worms, and given the below metrics it simply doesn't seem +worth the trouble ☺) + +----------------------------------------------------------------------- + +The script used to compute these metrics can be found [there][benchmark-script]. +We use [Dovecot] as IMAP server; the “remote” mailbox store is in +[multi-dbox][dbox] format (initially populated with random messages of average +size ~4kiB, and randomly pruned to avoid having only contiguous UIDs) while +[maildir] is used “locally”. The configuration files were not tuned for +performance (however [InterIMAP] takes advantage of Dovecot's support of the +[IMAP `COMPRESS` extension][RFC 4978] as it is its default behavior). + +The *user* (resp. *system*) column denotes the number of CPU-seconds +used by the process in user (resp. kernel) mode. The *real* column is +the elapsed real (wall clock) time. Network measurements are obtained +by placing packet counters on the interface. + +[RFC 4978]: https://tools.ietf.org/html/rfc4978 +[RFC 7162]: https://tools.ietf.org/html/rfc7162 +[InterIMAP]: interimap.1.html +[OfflineIMAP]: https://www.offlineimap.org/ +[benchmark-script]: https://git.guilhem.org/interimap/plain/benchmark/run +[Dovecot]: https://dovecot.org +[dbox]: https://wiki.dovecot.org/MailboxFormat/dbox +[maildir]: https://wiki.dovecot.org/MailboxFormat/Maildir + +----------------------------------------------------------------------- + +Single mailbox {#single-mailbox} +============== + +We create a mailbox on the remote server, populate it with a number of +messages, and synchronize it locally. We then collect metrics for no-op +synchronization (i.e., of mailboxes that are already in sync), and +reconciliation after receiving a *single* message on the remote server. + +[OfflineIMAP]'s network usage remains low in “quick” mode for large +mailboxes that are already in sync, but as soon as a mail arrives the +performance degrades by *several orders of magnitude*. On the other +hand [InterIMAP] has very little overhead on large mailboxes (also +memory-wise), and when a message is delivered there is barely more +traffic than what's required for the transfer of said message. + +100 messages +------------ + +### No-op (in sync) ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.05s 0.01s 0.07s 85% 21368k 1439B / 1017B 13 / 15 +offlineimap -q 0.04s 0.01s 0.27s 23% 19748k 2497B / 1236B 16 / 20 +offlineimap 0.05s 0.01s 0.32s 22% 19268k 10kiB / 1456B 21 / 23 + +### Reconciliation ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.06s 0.00s 0.08s 83% 21116k 4516B / 1412B 17 / 19 +offlineimap -q 0.06s 0.00s 0.32s 22% 19968k 15kiB / 1670B 23 / 26 +offlineimap 0.06s 0.00s 0.32s 22% 18616k 14kiB / 1284B 25 / 19 + +1000 messages +------------- + +### No-op (in sync) ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.05s 0.01s 0.07s 84% 21204k 1449B / 965B 13 / 14 +offlineimap -q 0.06s 0.01s 0.33s 24% 19068k 2664B / 1236B 19 / 20 +offlineimap 0.09s 0.02s 0.37s 30% 19868k 75kiB / 1508B 26 / 24 + +### Reconciliation ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.06s 0.00s 0.08s 78% 21212k 4524B / 1333B 17 / 16 +offlineimap -q 0.08s 0.03s 0.33s 37% 22284k 80kiB / 1775B 29 / 28 +offlineimap 0.10s 0.01s 0.32s 36% 20116k 80kiB / 1597B 24 / 25 + +10000 messages +-------------- + +### No-op (in sync) ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.06s 0.00s 0.09s 75% 20980k 1449B / 965B 13 / 14 +offlineimap -q 0.10s 0.03s 0.37s 37% 36708k 2719B / 1184B 20 / 19 +offlineimap 0.50s 0.09s 0.78s 75% 45424k 746kiB / 2080B 37 / 35 + +### Reconciliation ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.06s 0.00s 0.12s 54% 21136k 4530B / 1205B 17 / 16 +offlineimap -q 0.51s 0.08s 0.76s 77% 42860k 751kiB / 2608B 43 / 44 +offlineimap 0.62s 0.16s 0.88s 89% 47996k 750kiB / 2222B 38 / 37 + +100000 messages +--------------- + +### No-op (in sync) ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.06s 0.00s 0.16s 38% 21080k 1441B / 1017B 13 / 15 +offlineimap -q 1.06s 0.10s 1.40s 83% 201376k 2722B / 1236B 20 / 20 +offlineimap 4.88s 0.83s 5.23s 109% 280716k 7626kiB / 5564B 138 / 102 + +### Reconciliation ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.06s 0.00s 0.48s 15% 22876k 4532B / 1362B 17 / 19 +offlineimap -q 5.09s 0.75s 5.38s 108% 277336k 7637kiB / 9941B 261 / 185 +offlineimap 4.92s 0.76s 5.22s 108% 279592k 7631kiB / 5603B 144 / 102 + +----------------------------------------------------------------------- + +75 mailboxes {#multi-mailbox} +============ + +We create 75 mailboxes on the remote server, populate them with an equal +number of messages, and synchronize them locally. We then collect +metrics for no-op synchronization (i.e., of mailboxes that are already +in sync), and reconciliation after the following changes are being +applied to the remote server: + + - 3 *new* messages (two on mailbox #2, one on mailbox #3); and + - 5 existing messages *EXPUNGEd* (two on mailboxes #3 and #4, one on + mailbox #5). + +The results are not surprising given the metrics from the [above +section](#single-mailbox). In “quick” mode [OfflineIMAP] still performs +reasonably well when the mailboxes are in sync (even though it iterates +through each mailbox and the extra roundtrips increase network traffic +compared to the single mailbox case), but performance decrease +significantly when a message is delivered to a large mailbox. Once +again [InterIMAP] has very little network overhead regardless of mailbox +size; it does take longer on very large mailboxes, but the bottleneck is +the IMAP server ([InterIMAP] is just rolling thumbs waiting for Dovecot +to compute `STATUS` responses). + +100 messages per mailbox +------------------------ + +### No-op (in sync) ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.06s 0.00s 0.12s 55% 21712k 1949B / 898B 11 / 13 +offlineimap -q 0.32s 0.08s 0.43s 92% 22400k 36kiB / 7260B 93 / 99 +offlineimap 0.97s 0.32s 1.32s 98% 22648k 606kiB / 19kiB 243 / 251 + +### Reconciliation ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.07s 0.00s 0.15s 53% 21860k 10kiB / 1634B 19 / 19 +offlineimap -q 0.34s 0.11s 0.59s 77% 21248k 81kiB / 8697B 109 / 117 +offlineimap 0.93s 0.35s 1.30s 98% 22804k 620kiB / 20kiB 252 / 253 + +1000 messages per mailbox +------------------------- + +### No-op (in sync) ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.05s 0.01s 0.31s 22% 22028k 1944B / 898B 11 / 13 +offlineimap -q 0.97s 0.22s 1.22s 97% 23920k 36kiB / 7000B 90 / 94 +offlineimap 4.87s 1.54s 5.01s 127% 25040k 5507kiB / 26kiB 393 / 388 + +### Reconciliation ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.08s 0.00s 0.29s 28% 22132k 10kiB / 1931B 20 / 19 +offlineimap -q 1.25s 0.32s 1.45s 108% 27276k 344kiB / 9038B 119 / 123 +offlineimap 4.72s 1.70s 5.05s 127% 26464k 5521kiB / 27kiB 399 / 392 + +10000 messages per mailbox +-------------------------- + +### No-op (in sync) ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.07s 0.00s 1.57s 4% 21896k 1942B / 898B 11 / 13 +offlineimap -q 12.10s 3.98s 11.67s 137% 58624k 37kiB / 10kiB 94 / 168 +offlineimap 55.49s 23.68s 51.50s 153% 70652k 54MiB / 57kiB 1072 / 996 + +### Reconciliation ### + + user system real CPU max RSS traffic (in/out) packets (in/out) +-------------- ------ ------- ------ ---- -------- ------------------ ------------------ + interimap 0.08s 0.00s 1.73s 5% 23108k 10kiB / 1624B 20 / 23 +offlineimap -q 14.60s 5.22s 14.00s 141% 64988k 3028kiB / 15kiB 203 / 263 +offlineimap 57.24s 25.92s 53.72s 154% 76560k 54MiB / 89kiB 1981 / 1625 + +----------------------------------------------------------------------- + +Live synchronization {#live-sync} +==================== + +97 mailboxes, 500000 messages in total: + + - 2 with 100000 messages; + - 10 with 10000 messages; + - 20 with 5000 messages; + - 45 with 2000 messages; and + - 20 with 500 messages. + +The two local mail stores (respectively for [InterIMAP] and +[OfflineIMAP]) are initially in sync with the remote server, and we keep +long-running “autorefresh” synchronization processes alive for 6h, with +updates being regularly applied to the remote server: every 5 seconds, + + - a new message is delivered to a random mailbox with 5% probability + (once every 100s on average); + - a random message is EXPUNGEd with 5% probability (once every 100s on + average); and + - a random message is marked as seen with 10% probability (once every + 50s on average). + +`interimap` is configured to sync every *30s*. `offlineimap` is +configured to quick sync very *30s*, with a regular sync every *1h*. + + user system max RSS traffic (in/out) packets (in/out) +----------- -------- -------- -------- ------------------ ------------------ + interimap 12.95s 0.26s 24276k 743kiB / 257kiB 2207 / 4143 +offlineimap 5327.79s 1495.78s 394044k 942MiB / 7840kiB 87k / 126k + +Long-lived synchronization for large and busy mail stores is where +[InterIMAP] truly shines, in terms of CPU as well as network usage. +(The amount of CPU time spent in kernel mode is so low because the +process spends most of its time sleeping or in blocking calls waiting +for the server to compute `STATUS` responses. Smart servers like +Dovecot should cache states though, hence are able to serve these +responses quickly.) Thanks to the [`QRESYNC`][RFC 7162]-based +synchronization there is no need for complex client-side computation, +nor for sending vast amount of data over the network. (To be fair, +while the amount of CPU time spent in user mode remains low, the local +IMAP server might do a bit of extra work which is not counted here. But +here again caching helps avoid expensive directory traversal.) The +performance gain is most appreciated for battery-powered devices, as +well as devices behind slow and/or high-latency network connections ☺. +Moreover [InterIMAP] *does* synchronize flag updates at every step, while +[OfflineIMAP] normally skips these in “quick” mode so might *delay* flag +updates for up to one hour. diff --git a/doc/getting-started.md b/doc/getting-started.md index 371449d..e20b71d 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -26,8 +26,8 @@ not*. Instead, InterIMAP needs an [IMAP4rev1] server on *both* peers to synchronize. This may sound like a severe limitation at first, but by seeing both local and remote mail storage though the same “IMAP lens”, InterIMAP is able to take advantage of the abstraction layer and -perform significant optimizations, yielding much faster synchronization. -(*TODO* link to benchmark.) +perform significant optimizations, yielding [much faster](benchmark.html) +synchronization. *Note*: InterIMAP uses the [Quick Mailbox Resynchronization][RFC 7162] extension for stateful synchronization, hence won't work on IMAP servers that don't advertise support for that extension. @@ -226,8 +226,9 @@ update is requested every minute. Thanks to the [`QRESYNC`][RFC 7162] IMAP extension a status update scales linearly with the number of mailboxes (unlike [OfflineIMAP] *not* with the number of messages). And thanks to the `COMPRESS` extension, the typical volume of data exchanged -is rather small (*TODO* metrics). You may even want to override the -default settings and reduce the interval between status updates to 20s: +[is rather small](benchmark.html#live-sync). You may even want to +override the default settings and reduce the interval between status +updates to 20s: $ mkdir -p ${XDG_CONFIG_HOME:-~/.config}/systemd/user/interimap.service.d <!-- --> @@ -266,7 +267,7 @@ Other use-cases: Benchmarks: -: *TODO* +: [Benchmark metrics and comparison](benchmark.html) Manual diff --git a/doc/interimap.1.md b/doc/interimap.1.md index ee92668..5cc06a8 100644 --- a/doc/interimap.1.md +++ b/doc/interimap.1.md @@ -192,7 +192,7 @@ Options : Show the version number and exit. -Configuration file +Configuration file {#configuration-file} ================== Unless told otherwise by the `--config=FILE` command-line option, @@ -429,7 +429,7 @@ Valid options are: : File containing trusted certificates to use during server certificate authentication if `SSL_verify=YES`. -Supported extensions +Supported extensions {#supported-extensions} ==================== `interimap` takes advantage of servers supporting the following @@ -439,7 +439,7 @@ extensions to the [IMAP4rev1 protocol][RFC 3501] (those marked as * `LITERAL+` ([RFC 2088], recommended); * `MULTIAPPEND` ([RFC 3502], recommended); * `COMPRESS=DEFLATE` ([RFC 4978], recommended); - * `NOTIFY` ([RFC 5465], recommended); + * `NOTIFY` ([RFC 5465]); * `SASL-IR` ([RFC 4959]); and * `UNSELECT` ([RFC 3691]). diff --git a/doc/pullimap.1.md b/doc/pullimap.1.md index d40ece8..5028a14 100644 --- a/doc/pullimap.1.md +++ b/doc/pullimap.1.md @@ -71,7 +71,7 @@ Options : Show the version number and exit. -Configuration file +Configuration file {#configuration-file} ================== Unless told otherwise by the `--config=FILE` command-line option, @@ -249,7 +249,7 @@ Valid options are: : File containing trusted certificates to use during server certificate authentication if `SSL_verify=YES`. -Control flow +Control flow {#control-flow} ============ `pullimap` opens the *statefile* corresponding to a given configuration diff --git a/doc/template.html b/doc/template.html index dbcc0e6..41bf3d7 100644 --- a/doc/template.html +++ b/doc/template.html @@ -20,6 +20,9 @@ $endif$ span.underline{text-decoration: underline;} div.column{display: inline-block; vertical-align: top; width: 50%;} pre{tab-size: 4; -moz-tab-size: 4;} + table{width: 100%; margin-bottom: 3ex;} + table > thead > tr.header > th{border-bottom: 2px solid #ddd; padding: 8px;} + table > tbody > tr > td{border-bottom: 1px solid #ddd; padding: 6px;} @media only screen and (min-width: 600px) { .parent { float: right; @@ -1154,8 +1154,11 @@ sub callback_new_message($$$$;$$$) { my ($idx, $mailbox, $name, $mail, $UIDs, $buff, $bufflen) = @_; return unless exists $mail->{RFC822}; # not for us - my $length = length ${$mail->{RFC822}}; + my $length = length(${$mail->{RFC822}} // ""); if ($length == 0) { + # the RFC822 attribute can be NIL or empty (it's an nstring), however + # NIL can't be used in APPEND commands, and RFC 3502 sec. 6.3.11 + # explicitly forbids zero-length messages, so we ignore these here msg2($name => $mailbox, "WARNING: Ignoring new 0-length message (UID $mail->{UID})"); return; } diff --git a/lib/Net/IMAP/InterIMAP.pm b/lib/Net/IMAP/InterIMAP.pm index ef1c20b..1bff06e 100644 --- a/lib/Net/IMAP/InterIMAP.pm +++ b/lib/Net/IMAP/InterIMAP.pm @@ -1111,7 +1111,7 @@ sub set_cache($$%) { if ($k eq 'UIDVALIDITY') { # try to detect UIDVALIDITY changes early (before starting the sync) $self->fail("UIDVALIDITY changed! ($cache->{UIDVALIDITY} != $v) ". - "Need to invalidate the UID cache.") + "Need to invalidate the UID cache for $mailbox.") if defined $cache->{UIDVALIDITY} and $cache->{UIDVALIDITY} != $v; } $cache->{$k} = $v; @@ -1819,7 +1819,7 @@ sub _update_cache_for($$%) { if ($k eq 'UIDVALIDITY') { # try to detect UIDVALIDITY changes early (before starting the sync) $self->fail("UIDVALIDITY changed! ($cache->{UIDVALIDITY} != $v) ". - "Need to invalidate the UID cache.") + "Need to invalidate the UID cache for $mailbox.") if defined $cache->{UIDVALIDITY} and $cache->{UIDVALIDITY} != $v; $self->{_PCACHE}->{$mailbox}->{UIDVALIDITY} //= $v; } @@ -166,7 +166,7 @@ sub sendmail($$) { ); print STDERR "C: [...]\n" if $CONFIG{debug}; - if ($$rfc822 eq '') { + if (!defined $$rfc822 or $$rfc822 eq "") { # RFC 5321 section 4.1.1.4: if there was no mail data, the first # "\r\n" ends the DATA command itself $SMTP->printflush("\r\n.\r\n") or die; @@ -175,17 +175,18 @@ sub sendmail($$) { my $length = length($$rfc822); while ((my $end = index($$rfc822, "\r\n", $offset) + 2) != 1) { my $line = substr($$rfc822, $offset, $end-$offset); - # RFC 5321 section 4.5.2: the character sequence "\r\n.\r\n" - # ends the mail text and cannot be sent by the user - $SMTP->print($line eq ".\r\n" ? "..\r\n" : $line) or die; + # RFC 5321 sec. 4.5.2: if the line starts with a dot, double it + $line = ".".$line if substr($line, 0, 1) eq "."; + $SMTP->print($line) or die; $offset = $end; } if ($offset < $length) { # the last line did not end with "\r\n"; add it in order to # have the receiving SMTP server recognize the "end of data" - # condition. See RFC 5321 section 4.1.1.4 + # condition. See RFC 5321 sec. 4.1.1.4 my $line = substr($$rfc822, $offset); - $SMTP->print(($line eq "." ? ".." : $line), "\r\n") or die; + $line = ".".$line if substr($line, 0, 1) eq "."; + $SMTP->print($line, "\r\n") or die; } $SMTP->printflush(".\r\n") or die; } diff --git a/tests/pullimap/t b/tests/pullimap/t index 7ae0c5f..79da3e0 100644 --- a/tests/pullimap/t +++ b/tests/pullimap/t @@ -6,12 +6,9 @@ step_start "\`pullimap --idle\` refuses to create the state file" ! pullimap --idle "remote" || error step_done -# make sure remote UIDs are 11-bytes long -doveadm -u "remote" mailbox update --min-next-uid 1000000000 "$MAILBOX" - # compare mailboxes; can't compare the RFC 3501 TEXT as LMTP adds a # Received: header. -# TODO unset lmtp_add_received_header once avaisable in Sid: +# TODO unset lmtp_add_received_header once available in sid: # https://doc.dovecot.org/settings/dovecot_core_settings/#lmtp-add-received-header list_mails_sha256() { local u="$1" guid uid @@ -26,7 +23,59 @@ check() { <( list_mails_sha256 "remote" ) \ || error "mailboxes differ" } +message_from() { + local date="$(date +"%s.%N")" sender="$1" + cat <<-EOF + From: $sender + To: <me@example.net> + Date: $(date -R -d@"$date") + Message-ID: <$date@example.net> + + EOF + xxd -ps -l8 /dev/urandom +} +step_start "Quote envelope sender address" +declare -a senders=("sender" "first.last" "foo-bar" \"\" "\"x\\\" #&\\\\y\"") +for s in "${senders[@]}"; do + message_from "$s@example.net" | deliver -u "remote" -- -m "$MAILBOX" +done +pullimap "remote" || error +check +for s in "${senders[@]}"; do + grep -F " from <$s@example.net> " <"$STDERR" || error "$s" +done +step_done + +step_start "Mail without data" +deliver -u "remote" -- -m "$MAILBOX" </dev/null +pullimap "remote" || error +check +step_done + +step_start "Dot-leading lines" +deliver -u "remote" -- -m "$MAILBOX" <<-EOF + From: alice@example.net + To: bob@example.net + Date: $(date -R) + Message-ID: <$(xxd -ps -l8 /dev/urandom)@example.net> + + foo + . + .bar + ..baz +EOF +# we can't add a test for message data not ending with CRLF, because the +# LMTP/SMTP client needs to add a CRLF so local and remote message +# bodies would differ. that said, while such a message could be added +# by IMAP and LDA, it's not valid for SMTP (RFC 5321 sec. 4.1.1.4) +pullimap "remote" || error +check +step_done + + +# make sure remote UIDs are 11-bytes long +doveadm -u "remote" mailbox update --min-next-uid 1000000000 "$MAILBOX" # Add some messages and sync step_start "Fetching messages" @@ -40,11 +89,11 @@ check # same thing, but with some missing messages for ((i = 0; i < N; i+=2)); do sample_message | deliver -u "remote" -- -m "$MAILBOX" - deliver -u "remote" -- -m "$MAILBOX" </dev/null # even seqnum + deliver -u "remote" -- -m "$MAILBOX" </dev/null # odd seqnum done for ((i = 0; i < N; i+=2)); do # expunge every other message - doveadm -u "remote" expunge mailbox "$MAILBOX" $((N-i+32)) + doveadm -u "remote" expunge mailbox "$MAILBOX" $((N-i+32+7)) sample_message | deliver -u "remote" -- -m "$MAILBOX" done diff --git a/tests/resume/t b/tests/resume/t index cb0208c..a281ef3 100644 --- a/tests/resume/t +++ b/tests/resume/t @@ -38,7 +38,7 @@ doveadm -u "remote" mailbox status "all" "foo" >"$TMPDIR/foo.remote" ! interimap || error grep -Fx "Resuming interrupted sync for foo" <"$STDERR" -grep -Fx "local(foo): ERROR: UIDVALIDITY changed! ($uidvalidity2 != $uidvalidity) Need to invalidate the UID cache." <"$STDERR" +grep -Fx "local(foo): ERROR: UIDVALIDITY changed! ($uidvalidity2 != $uidvalidity) Need to invalidate the UID cache for foo." <"$STDERR" sqlite3 "$XDG_DATA_HOME/interimap/remote.db" >"$TMPDIR/dump2.sql" <<-EOF .dump @@ -235,11 +235,9 @@ sqlite3() { # Sample (random) message sample_message() { local date="$(date +"%s.%N")" - # also try non-conventional addresses for pullimap - local sender="$(shuf -n1 -e "sender" "first.last" "foo-bar" \"\" "\"x\\\" #&\\\\y\"" )" cat <<-EOF - From: <$sender@example.net> - To: <recipient@example.net> + From: <$(xxd -ps -l6 /dev/urandom)@example.net> + To: <me@example.net> Date: $(date -R -d@"$date") Message-ID: <$date@example.net> |