diff options
| -rw-r--r-- | benchmark/dovecot.conf | 55 | ||||
| -rwxr-xr-x | benchmark/random_maildir.pl | 57 | ||||
| -rwxr-xr-x | benchmark/run | 506 | ||||
| -rw-r--r-- | doc/benchmark.md | 279 | ||||
| -rw-r--r-- | doc/getting-started.md | 11 | ||||
| -rw-r--r-- | doc/template.html | 3 | 
6 files changed, 906 insertions, 5 deletions
| diff --git a/benchmark/dovecot.conf b/benchmark/dovecot.conf new file mode 100644 index 0000000..55301d9 --- /dev/null +++ b/benchmark/dovecot.conf @@ -0,0 +1,55 @@ +log_path = /dev/shm/mail.log +mail_home = /dev/shm/vmail/%u +mail_location = mdbox:~/mail +ssl = no + +listen = 127.0.0.1 +namespace { +    inbox = yes +    separator = / +} + +# https://wiki.dovecot.org/HowTo/Rootless +base_dir = /dev/shm/dovecot/run +default_internal_user  = nobody +default_internal_group = nogroup +default_login_user     = nobody + +service anvil { +    chroot = +} +service imap-login { +    chroot = +} +service stats { +    chroot = +} + +passdb { +    args = scheme=PLAIN username_format=%u /dev/shm/dovecot/users +    driver = passwd-file +} +userdb { +    args = username_format=%u /dev/shm/dovecot/users +    driver = passwd-file +} + +protocols = imap + +mail_plugins = zlib +protocol imap { +    mail_plugins = imap_zlib +} + +service imap-login { +    inet_listener imap { +        # disable +        port = 0 +    } +    inet_listener interimap { +        port = 10143 +    } +    inet_listener offlineimap { +        port = 10144 +    } +} diff --git a/benchmark/random_maildir.pl b/benchmark/random_maildir.pl new file mode 100755 index 0000000..363eb41 --- /dev/null +++ b/benchmark/random_maildir.pl @@ -0,0 +1,57 @@ +#!/usr/bin/perl + +#---------------------------------------------------------------------- +# Generate a random mbox +# Copyright © 2019 Guilhem Moulin <guilhem@fripost.org> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program.  If not, see <http://www.gnu.org/licenses/>. +#---------------------------------------------------------------------- + +use warnings; +use strict; + +use Time::HiRes (); +use POSIX qw{strftime setlocale}; +use Crypt::URandom "urandom"; + +setlocale(POSIX::LC_TIME, "C"); + +foreach (qw/cur new tmp/) { +    my $d = $ARGV[0] ."/". $_; +    mkdir $d, 0700 or die "mkdir: $!" +} + +my $NEW = $ARGV[0] ."/new"; +for (my $i = 0; $i < $ARGV[1]; $i++) { +    my $timestamp = Time::HiRes::gettimeofday(); +    my $filename = sprintf("%10d.M%010d.localhost", $timestamp, $i); +    open my $out, ">", $NEW ."/". $filename or die "open: $!"; + +    my $message_id = sprintf "%.10f\@example.net", $timestamp; +    my $sender = unpack("H*", urandom(8))."\@example.net"; +    print $out +          "From: <$sender>\r\n" +        . "To: <recipient\@example.net> \r\n" +        . "Subject: Hello world! \r\n" +        . "Date: ". strftime("%a, %e %b %Y %H:%M:%S %z", localtime($timestamp)). "\r\n" +        . "Message-ID: ". sprintf("<%.10f\@example.net>", $timestamp)."\r\n" +        . "\r\n"; + +    my $len = 1 + int(rand(4095)); +    my $body = unpack("H*", urandom($len)); +    for (my $i = 0; $i < 2*$len; $i += 70) { +        print $out substr($body, $i, 70), "\r\n"; +    } +    close($out); +} diff --git a/benchmark/run b/benchmark/run new file mode 100755 index 0000000..4a83c68 --- /dev/null +++ b/benchmark/run @@ -0,0 +1,506 @@ +#!/bin/bash + +set -ue +PATH="/usr/sbin:/usr/bin:/sbin:/bin" +export PATH + +unset CHROOT NETNS +cleanup() { +    if [ "${CHROOT+x}" ]; then +        schroot --end-session --chroot="$CHROOT" +    fi +    if [ "${NETNS+x}" ]; then +        ip netns del "$NETNS" +    fi +} +trap cleanup EXIT INT TERM + +# create new CHROOT +DEB_BUILD_ARCH="$(dpkg-architecture -qDEB_BUILD_ARCH)" +CHROOT="$(schroot -c "unstable-$DEB_BUILD_ARCH-sbuild" -b)" +ROOTDIR="/run/schroot/mount/$CHROOT" + +# create new network namespace and place counters to measure network usage +ip netns add "${NETNS:="interimap-benchmark"}" +ip netns exec "$NETNS" nft -f- <<- EOF +	flush ruleset +	table inet filter { +	    counter interimap-in { } +	    counter interimap-out { } +	    counter offlineimap-in { } +	    counter offlineimap-out { } +	    chain input { +	        type filter hook input priority 0 +	        iif "lo" ip daddr 127.0.0.1 tcp dport 10143 counter name interimap-in +	        iif "lo" ip daddr 127.0.0.1 tcp dport 10144 counter name offlineimap-in +	    } +	    chain output { +	        type filter hook output priority 0 +	        oif "lo" ip saddr 127.0.0.1 tcp sport 10143 counter name interimap-out +	        oif "lo" ip saddr 127.0.0.1 tcp sport 10144 counter name offlineimap-out +	    } +	} +EOF +ip netns exec "$NETNS" ip addr add "127.0.0.1" dev "lo" +ip netns exec "$NETNS" ip link set "lo" up + +# resize the partition so it can hold the mail stores (you may want to +# turn swap off too) +mount -o"remount,size=15G" "/run/schroot/mount/$CHROOT/dev/shm" + +# install dependencies +schroot --directory="/" --chroot="$CHROOT" -r -- \ +    env DEBIAN_FRONTEND="noninteractive" apt-get install \ +        --no-install-recommends --assume-yes \ +            time dovecot-imapd offlineimap \ +            libconfig-tiny-perl libdbd-sqlite3-perl libnet-ssleay-perl \ +            libcrypt-urandom-perl procps + +# run a command in the chroot +jail() { +    local user="" home="" +    case "${u:-local}" in +        local) user="nobody"; home="/dev/shm/nobody";; +        remote) user="user"; home="/dev/shm/vmail/user";; +    esac +    ip netns exec "$NETNS" \ +        schroot --directory="/dev/shm/nobody" --user="nobody" --chroot="$CHROOT" -r \ +            -- env -i PATH="/usr/bin:/bin" USER="$user" HOME="$home" "$@" +} + +# run a command in the chroot in a monitored fashion +jail_stat() { +    local e U S M P u="local" +    local counters="$ROOTDIR/tmp/counters.$1.json" +    ip netns exec "$NETNS" nft reset counter inet filter "$1-in" >/dev/null +    ip netns exec "$NETNS" nft reset counter inet filter "$1-out" >/dev/null + +    jail time --format="%e\\t%U\\t%S\\t%M\\t%P" --output="/tmp/time.$1" \ +        -- "$@" >/dev/null || true +    IFS=$'\t' read e U S M P <"$ROOTDIR/tmp/time.$1" + +    local i="${NAME:-$1}" +    local a="${i#* }" +    [ "$a" = "$i" ] && a="   " || a=" $a" + +    printf "%11s%s" "${i%% *}" "$a" +    printf "  %5.2fs  %6.2fs" "$U" "$S" +    if [ "${IDLE:-n}" = "n" ]; then +        printf "  %5.2fs  %4s" "$e" "$P" +    fi +    printf "  %8s" "${M}k" + +    ip netns exec "$NETNS" nft -j list counters | jq ".nftables +        | map(select(.counter) | .counter | { key: .name, value: {packets, bytes} }) +        | from_entries" >"$counters" + +    local ib ip ob op +    ib="$(bytes "$(jq ".\"$1-in\".bytes"  <"$counters")")" +    ob="$(bytes "$(jq ".\"$1-out\".bytes" <"$counters")")" +    ip="$(_units "$(jq ".\"$1-in\".packets"  <"$counters")" 1000)" +    op="$(_units "$(jq ".\"$1-out\".packets" <"$counters")" 1000)" +    printf "  %8s / %-7s" "$ob" "$ib" # inverse for the client's perspective +    printf "  %8s / %-7s" "$op" "$ip" +    printf "\\n" +} + +# display metrics headers +headers() { +    declare -a h=("  user" " system") +    if [ "${IDLE:-n}" = "n" ]; then +        h+=( "  real" " CPU" ) +    fi +    h+=( " max RSS" " traffic (in/out) " " packets (in/out) " ) +    local x="offlineimap -q" i + +    printf "%s" "${x//?/ }" +    for i in "${h[@]}"; do +        printf "  %s" "$i" +    done +    printf "\\n" + +    printf "%s" "${x//?/-}" +    for i in "${h[@]}"; do +        printf "  %s" "${i//?/-}" +    done +    printf "\\n" +} + +# install Dovecot's "system" configuration and start the server +install -onobody -gnogroup -m0700 --directory \ +    "$ROOTDIR/dev/shm/dovecot" \ +    "$ROOTDIR/dev/shm/vmail" \ +    "$ROOTDIR/dev/shm/nobody" + +install -onobody -gnogroup -m0644 \ +    "./benchmark/dovecot.conf" \ +    "$ROOTDIR/dev/shm/dovecot/config" +jail /usr/sbin/dovecot -c"/dev/shm/dovecot/config" + +install -onobody -gnogroup -m0600 /dev/null \ +    "$ROOTDIR/dev/shm/dovecot/users" +PASSWORD="$(xxd -l16 -p </dev/urandom)" +printf "%s:%s:::::\\n" "user" "$PASSWORD" \ +    >"$ROOTDIR/dev/shm/dovecot/users" + +# install user configuration for Dovecot, interimap, and offlineimap +cat >"$ROOTDIR/dev/shm/nobody/.dovecot.conf" <<-EOF +	log_path = /dev/null +	mail_home = /dev/shm/nobody +	mail_location = maildir:~/Maildir +	ssl = no +EOF + +install -onobody -gnogroup -Dm0700 --directory \ +    "$ROOTDIR/dev/shm/nobody/.config/interimap" \ +    "$ROOTDIR/dev/shm/nobody/.local/share" + +cat >"$ROOTDIR/dev/shm/nobody/.config/interimap/config" <<-EOF +	database = bench.db + +	[local] +	type = tunnel +	command = doveadm -c/dev/shm/nobody/.dovecot.conf exec imap +	null-stderr = YES + +	[remote] +	type = imap +	host = 127.0.0.1 +	port = 10143 +	STARTTLS = no +	username = user +	password = $PASSWORD +EOF + +cat >"$ROOTDIR/dev/shm/nobody/.offlineimaprc" <<-EOF +	[general] +	accounts = bench + +	[Account bench] +	localrepository = local +	remoterepository = remote + +	[Repository local] +	type = Maildir +	localfolders = ~/Maildir2 + +	[Repository remote] +	type = IMAP +	remotehost = 127.0.0.1 +	remotepass = $PASSWORD +	remoteport = 10144 +	remoteuser = user +	ssl = no +	starttls = no +	# keep the default (no) as it doesn't seem to work with large mailboxes, perhaps +	# due to https://dovecot.org/pipermail/dovecot/2019-November/117522.html +	#usecompression = yes +EOF + +# install interimap's development version +install -oroot -groot -m0755 -Dt "/$ROOTDIR/usr/bin" \ +    ./interimap ./benchmark/random_maildir.pl +install -oroot -groot -Dm0644 \ +    ./lib/Net/IMAP/InterIMAP.pm "$ROOTDIR/usr/share/perl5/Net/IMAP/InterIMAP.pm" + +# create a random mail store at mdbox:~/mail.back +prepare() { +    local u="remote" d m n seqs +    local maildir="/dev/shm/vmail/user/maildir" + +    clear +    jail rm -rf -- "$maildir" "/dev/shm/vmail/user/mail.back" + +    for m in "${!MAILBOXES[@]}"; do +        [ "${m^^[a-z]}" = "INBOX" ] && d="$maildir" || d="$maildir/.$m" +        jail mkdir -p -- "$d" +        # create 20% more; will be deleted afterwards (having only +        # contiguous UIDs might bias the metrics) +        n="${MAILBOXES["$m"]}" +        jail random_maildir.pl "$d" $((n+n/5)) +    done + +    # convert to mdbox +    jail doveadm -c"/dev/shm/dovecot/config" -omail_location="maildir:~/maildir" \ +        sync "mdbox:~/mail.back" +    jail rm -rf -- "$maildir" + +    # expunge 20% and purge +    for m in "${!MAILBOXES[@]}"; do +        n="${MAILBOXES["$m"]}" +        seqs="$(shuf -n $((n/5)) -i"1-$n")" +        jail doveadm -c"/dev/shm/dovecot/config" -omail_location="mdbox:~/mail.back" \ +            expunge mailbox "$m" "${seqs//$'\n'/,}" +    done +    jail doveadm -c"/dev/shm/dovecot/config" -omail_location="mdbox:~/mail.back" purge +} + +# populate a clientn from backup mailstore mdbox:~/mail.back (copied to +# avoid recreating / conversion) +populate() { +    local m u="remote" cmd +    clear + +    if [ "${KEEP_BACKUP:-y}" = "n" ]; then +        jail mv -T "/dev/shm/vmail/user/mail.back" "/dev/shm/vmail/user/mail" +    else +        jail cp -aT "/dev/shm/vmail/user/mail.back" "/dev/shm/vmail/user/mail" +    fi + +    # force dovecot to index and compute the state, otherwise the first +    # thing to query might be disadvantaged +    jail doveadm -c"/dev/shm/dovecot/config" index "INBOX" +    jail doveadm -c"/dev/shm/dovecot/config" mailbox status "all" "*" >/dev/null + +    u="local" +    # initial configuration +    for cmd in "$@"; do +        case "$cmd" in +            interimap) jail interimap --quiet 2>/dev/null;; +            offlineimap) jail offlineimap -u quiet 2>/dev/null;; +            *) exit 1;; +        esac +    done +} + +# remove interimap / offlineimap database and mail store (but keep +# mdbox:~/mail.back) +clear() { +    jail rm -rf -- \ +        "/dev/shm/vmail/user/mail" \ +        "/dev/shm/nobody/.local/share/interimap/bench.db" \ +        "/dev/shm/nobody/.offlineimap" \ +        "/dev/shm/nobody/Maildir" \ +        "/dev/shm/nobody/Maildir2" +} + +# pretty print a number in k/M/G/T etc +_units() { +    local n=$(( $1 )) b="$2" s u="" +    [ $b -eq 1024 ] && s="i" || s="" +    while [ ${#n} -gt 4 ]; do +        case "$u" in +            "") u="k";; +            k) u="M";; +            M) u="G";; +            G) u="T";; +            *) break;; +        esac +        n=$((n/b)) +    done +    printf "%d%s" "$n" "${u:+$u$s}" +} +bytes() { printf "%sB" "$(_units "$1" 1024)"; } + +# generate and deliver a random message +sample_message() { +    local date="$(date +"%s.%N")" +    cat <<-EOF +		From: <sender@example.net> +		To: <recipient@example.net> +		Date: $(date -R -d@"$date") +		Message-ID: <$date@example.net> + +	EOF +    xxd -ps -c30 -l2048 /dev/urandom # 4165 bytes +} +deliver() { +    local m="$1" u="remote" +    jail doveadm -c"/dev/shm/dovecot/config" exec dovecot-lda -e -m "$m" +} + +# write down markdown title +title() { +    local x="$1" h="$2" +    printf "\\n%s\\n%s\\n" "$h" "${h//?/$x}" +} + +# run benchmark for `interimap` / `offlineimap -q` / `offlineimap`: +# populate, run optional actions (such as delivery), then sync again in +# a monitored fashion +run-all() { +    local a cmd u q="" NAME="" +    for cmd in "interimap" "offlineimap -q" "offlineimap"; do +        populate "${cmd%% *}" +        case "${cmd%% *}" in +            interimap) q="--quiet";; +            offlineimap) q="-u quiet";; +            *) exit 1; +        esac +        for a in "$@"; do "$a"; done +        NAME="$cmd" jail_stat $cmd $q 2>/dev/null +    done +} + + +echo; echo +title "=" "Single mailbox" + +cat <<-EOF + +	We create a mailbox on the remote server, populate it with a number of +	messages, and synchronize it locally.  We then collect metrics for no-op +	synchronization (i.e., of mailboxes that are already in sync), and +	reconciliation after receiving a *single* message on the remote server. +EOF + +# generate a message to be used in *all* "Single mailbox" tests +sample_message >"$ROOTDIR/tmp/msg1" +activity1() { +    deliver "inbox" <"$ROOTDIR/tmp/msg1" +} + + +declare -A MAILBOXES +for n in 100 1000 10000 100000; do +    title "-" "$n messages" +    MAILBOXES=( ["inbox"]="$n" ) +    prepare + +    printf "\\n### %s ###\\n\\n" "No-op (in sync)" +    headers +    run-all + +    printf "\\n### %s ###\\n\\n" "Reconciliation" +    headers +    run-all activity1 +done + + +m=75 +echo; echo +title "=" "$m mailboxes" + +cat <<-EOF + +	We create $m mailboxes on the remote server, populate them with an equal +	number of messages, and synchronize them locally.  We then collect +	metrics for no-op synchronization (i.e., of mailboxes that are already +	in sync), and reconciliation after the following changes are being +	applied to the remote server: + +	  - 3 *new* messages (two on mailbox #2, one on mailbox #3); and +	  - 5 existing messages *EXPUNGEd* (two on mailboxes #3 and #4, one on +	    mailbox #5). +EOF + +# generate more messages to be used in *all* "$m mailboxes" tests +sample_message >"$ROOTDIR/tmp/msg2" +sample_message >"$ROOTDIR/tmp/msg3" + +activity2() { +    local u="remote" +    deliver "mailbox2" <"$ROOTDIR/tmp/msg1" +    deliver "mailbox2" <"$ROOTDIR/tmp/msg2" +    deliver "mailbox3" <"$ROOTDIR/tmp/msg3" +    # intentionally modify the remote only because not all local backend speak IMAP +    jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox "mailbox3" "1:2" +    jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox "mailbox4" "1,3" +    jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox "mailbox5" "*" +} + +for n in 100 1000 10000; do +    title "-" "$n messages per mailbox" + +    MAILBOXES=( ["inbox"]="$n" ) +    for ((i=2; i<=$m; i++)); do +        MAILBOXES["mailbox$i"]="$n" +    done +    prepare + +    printf "\\n### %s ###\\n\\n" "No-op (in sync)" +    headers +    run-all + +    printf "\\n### %s ###\\n\\n" "Reconciliation" +    headers +    run-all activity2 +done + + +title "=" "Live synchronization" +timeout=$((6 * 3600)) +step=5 + +MAILBOXES=( ["inbox"]=100000 ["xlarge"]=100000 ) +for ((i=0; i<10; i++)); do +    MAILBOXES["large$i"]=10000 +done +for ((i=0; i<20; i++)); do +    MAILBOXES["medium$i"]=5000 +done +for ((i=0; i<45; i++)); do +    MAILBOXES["small$i"]=2000 +done +for ((i=0; i<20; i++)); do +    MAILBOXES["xsmall$i"]=500 +done + +n=0 +for i in "${MAILBOXES[@]}"; do +    n=$(( n + i )) +done + +cat <<-EOF + +	${#MAILBOXES[@]} mailboxes, $n messages in total: + +	  - 2 with 100000 messages; +	  - 10 with 10000 messages; +	  - 20 with 5000 messages; +	  - 45 with 2000 messages; and +	  - 20 with 500 messages. + +	The two local mail stores (respectively for [InterIMAP] and +	[OfflineIMAP]) are initially in sync with the remote server, and we keep +	long-running “autorefresh” synchronization processes alive for 6h, with +	updates being regularly applied to the remote server: every $step seconds, + +	  - a new message is delivered to a random mailbox with 5% probability +	    (once every $((20*step))s on average); +	  - a random message is EXPUNGEd with 5% probability (once every $((20*step))s on +	    average); and +	  - a random message is marked as seen with 10% probability (once every +	    $((10*step))s on average). + +	\`interimap\` is configured to sync every *30s*.  \`offlineimap\` is +	configured to quick sync very *30s*, with a regular sync every *1h*. + +EOF + +IDLE="y" headers +prepare +KEEP_BACKUP="n" populate "interimap" "offlineimap" + +IDLE="y" jail_stat interimap --quiet --watch=30 2>/dev/null & +IDLE="y" jail_stat offlineimap -u quiet -k "Account_bench:autorefresh=0.5" \ +    -k "Account_bench:quick=120" 2>/dev/null & + +u="remote" +timeout=$(( $(date +%s) + timeout )) +while [ $(date +%s) -lt $timeout ]; do +    n="$(shuf -n1 -i1-100)" +    if [ $n -le 5 ]; then +        # deliver to a random mailbox on the remote +        m="$(shuf -n1 -e -- "${!MAILBOXES[@]}")" +        sample_message | deliver "$m" +    fi +    n="$(shuf -n1 -i1-100)" +    if [ $n -le 5 ]; then +        # expunge a random message on the remote +        read guid uid < <(jail doveadm -c"/dev/shm/dovecot/config" search all | shuf -n1) +        jail doveadm -c"/dev/shm/dovecot/config" expunge mailbox-guid "$guid" uid "$uid" +    fi +    n="$(shuf -n1 -i1-100)" +    if [ $n -le 10 ]; then +        # mark a random message as seen +        read guid uid < <(jail doveadm -c"/dev/shm/dovecot/config" search all | shuf -n1) +        jail doveadm -c"/dev/shm/dovecot/config" flags add "\\Seen" mailbox-guid "$guid" uid "$uid" +    fi +    sleep $step +done + +jail pkill -TERM -u"nobody" -s0 interimap +sleep 0.2 # give a chance to print the stats +jail pkill -SIGABRT -u"nobody" -s0 offlineimap +wait diff --git a/doc/benchmark.md b/doc/benchmark.md new file mode 100644 index 0000000..72f51a4 --- /dev/null +++ b/doc/benchmark.md @@ -0,0 +1,279 @@ +% InterIMAP benchmark metrics and comparison +% [Guilhem Moulin](mailto:guilhem@fripost.org) + +The [IMAP `QRESYNC` extension][RFC 7162] allows efficient mailbox +synchronization, in terms of I/O as well as CPU usage.  In this document +we give some benchmark metrics to compare [InterIMAP]'s network usage with +so-called full synchronization solutions such as [OfflineIMAP].  The +timings are to be taken with a grain of salt, though: they likely won't +reflect real-world situations as the emails are stored in RAM for this +benchmark, and all network access is on the loopback interface.  (Moreover +neither SSL/TLS nor STARTTLS are being used in the below.  They would add +another 2-3 round-trips per connection.) + +These metrics show how [InterIMAP] scales linearly with the number of +*mailboxes* — pretty much regardless of how many messages they contain (at +least as long as the server can cope with large mailboxes) — while +[OfflineIMAP] scales with the number of *messages* on active mailboxes. + +While [InterIMAP] performs significantly better (especially given that it +can be relied upon to synchronize flag changes, unlike [OfflineIMAP]'s +“quick” mode), it should be noted that efficiency comes at the expense of +flexibility.  In particular it's not possible to exclude old messages from +synchronization (mailboxes can be excluded but finer granularity is not +possible).  And of course not all IMAP servers support [`QRESYNC`][RFC 7162] +and other extensions [InterIMAP] requires.  Furthermore [InterIMAP] is +single threaded and doesn't use pipelining at the moment.  (Concurrency +opens a can of worms, and given the below metrics it simply doesn't seem +worth the trouble ☺) + +----------------------------------------------------------------------- + +The script used to compute these metrics can be found [there][benchmark-script]. +We use [Dovecot] as IMAP server; the “remote” mailbox store is in +[multi-dbox][dbox] format (initially populated with random messages of average +size ~4kiB, and randomly pruned to avoid having only contiguous UIDs) while +[maildir] is used “locally”.  The configuration files were not tuned for +performance (however [InterIMAP] takes advantage of Dovecot's support of the +[IMAP `COMPRESS` extension][RFC 4978] as it is its default behavior). + +The *user* (resp. *system*) column denotes the number of CPU-seconds +used by the process in user (resp. kernel) mode.  The *real* column is +the elapsed real (wall clock) time.  Network measurements are obtained +by placing packet counters on the interface. + +[RFC 4978]: https://tools.ietf.org/html/rfc4978 +[RFC 7162]: https://tools.ietf.org/html/rfc7162 +[InterIMAP]: interimap.1.html +[OfflineIMAP]: https://www.offlineimap.org/ +[benchmark-script]: https://git.guilhem.org/interimap/plain/benchmark/run +[Dovecot]: https://dovecot.org +[dbox]: https://wiki.dovecot.org/MailboxFormat/dbox +[maildir]: https://wiki.dovecot.org/MailboxFormat/Maildir + +----------------------------------------------------------------------- + +Single mailbox  {#single-mailbox} +============== + +We create a mailbox on the remote server, populate it with a number of +messages, and synchronize it locally.  We then collect metrics for no-op +synchronization (i.e., of mailboxes that are already in sync), and +reconciliation after receiving a *single* message on the remote server. + +[OfflineIMAP]'s network usage remains low in “quick” mode for large +mailboxes that are already in sync, but as soon as a mail arrives the +performance degrades by *several orders of magnitude*.  On the other +hand [InterIMAP] has very little overhead on large mailboxes (also +memory-wise), and when a message is delivered there is barely more +traffic than what's required for the transfer of said message. + +100 messages +------------ + +### No-op (in sync) ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.05s    0.01s   0.07s   85%    21368k     1439B / 1017B          13 / 15 +offlineimap -q   0.04s    0.01s   0.27s   23%    19748k     2497B / 1236B          16 / 20 +offlineimap      0.05s    0.01s   0.32s   22%    19268k     10kiB / 1456B          21 / 23 + +### Reconciliation ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.06s    0.00s   0.08s   83%    21116k     4516B / 1412B          17 / 19 +offlineimap -q   0.06s    0.00s   0.32s   22%    19968k     15kiB / 1670B          23 / 26 +offlineimap      0.06s    0.00s   0.32s   22%    18616k     14kiB / 1284B          25 / 19 + +1000 messages +------------- + +### No-op (in sync) ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.05s    0.01s   0.07s   84%    21204k     1449B / 965B           13 / 14 +offlineimap -q   0.06s    0.01s   0.33s   24%    19068k     2664B / 1236B          19 / 20 +offlineimap      0.09s    0.02s   0.37s   30%    19868k     75kiB / 1508B          26 / 24 + +### Reconciliation ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.06s    0.00s   0.08s   78%    21212k     4524B / 1333B          17 / 16 +offlineimap -q   0.08s    0.03s   0.33s   37%    22284k     80kiB / 1775B          29 / 28 +offlineimap      0.10s    0.01s   0.32s   36%    20116k     80kiB / 1597B          24 / 25 + +10000 messages +-------------- + +### No-op (in sync) ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.06s    0.00s   0.09s   75%    20980k     1449B / 965B           13 / 14 +offlineimap -q   0.10s    0.03s   0.37s   37%    36708k     2719B / 1184B          20 / 19 +offlineimap      0.50s    0.09s   0.78s   75%    45424k    746kiB / 2080B          37 / 35 + +### Reconciliation ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.06s    0.00s   0.12s   54%    21136k     4530B / 1205B          17 / 16 +offlineimap -q   0.51s    0.08s   0.76s   77%    42860k    751kiB / 2608B          43 / 44 +offlineimap      0.62s    0.16s   0.88s   89%    47996k    750kiB / 2222B          38 / 37 + +100000 messages +--------------- + +### No-op (in sync) ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.06s    0.00s   0.16s   38%    21080k     1441B / 1017B          13 / 15 +offlineimap -q   1.06s    0.10s   1.40s   83%   201376k     2722B / 1236B          20 / 20 +offlineimap      4.88s    0.83s   5.23s  109%   280716k   7626kiB / 5564B         138 / 102 + +### Reconciliation ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.06s    0.00s   0.48s   15%    22876k     4532B / 1362B          17 / 19 +offlineimap -q   5.09s    0.75s   5.38s  108%   277336k   7637kiB / 9941B         261 / 185 +offlineimap      4.92s    0.76s   5.22s  108%   279592k   7631kiB / 5603B         144 / 102 + +----------------------------------------------------------------------- + +75 mailboxes  {#multi-mailbox} +============ + +We create 75 mailboxes on the remote server, populate them with an equal +number of messages, and synchronize them locally.  We then collect +metrics for no-op synchronization (i.e., of mailboxes that are already +in sync), and reconciliation after the following changes are being +applied to the remote server: + +  - 3 *new* messages (two on mailbox #2, one on mailbox #3); and +  - 5 existing messages *EXPUNGEd* (two on mailboxes #3 and #4, one on +    mailbox #5). + +The results are not surprising given the metrics from the [above +section](#single-mailbox).  In “quick” mode [OfflineIMAP] still performs +reasonably well when the mailboxes are in sync (even though it iterates +through each mailbox and the extra roundtrips increase network traffic +compared to the single mailbox case), but performance decrease +significantly when a message is delivered to a large mailbox.  Once +again [InterIMAP] has very little network overhead regardless of mailbox +size; it does take longer on very large mailboxes, but the bottleneck is +the IMAP server ([InterIMAP] is just rolling thumbs waiting for Dovecot +to compute `STATUS` responses). + +100 messages per mailbox +------------------------ + +### No-op (in sync) ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.06s    0.00s   0.12s   55%    21712k     1949B / 898B           11 / 13 +offlineimap -q   0.32s    0.08s   0.43s   92%    22400k     36kiB / 7260B          93 / 99 +offlineimap      0.97s    0.32s   1.32s   98%    22648k    606kiB / 19kiB         243 / 251 + +### Reconciliation ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.07s    0.00s   0.15s   53%    21860k     10kiB / 1634B          19 / 19 +offlineimap -q   0.34s    0.11s   0.59s   77%    21248k     81kiB / 8697B         109 / 117 +offlineimap      0.93s    0.35s   1.30s   98%    22804k    620kiB / 20kiB         252 / 253 + +1000 messages per mailbox +------------------------- + +### No-op (in sync) ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.05s    0.01s   0.31s   22%    22028k     1944B / 898B           11 / 13 +offlineimap -q   0.97s    0.22s   1.22s   97%    23920k     36kiB / 7000B          90 / 94 +offlineimap      4.87s    1.54s   5.01s  127%    25040k   5507kiB / 26kiB         393 / 388 + +### Reconciliation ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.08s    0.00s   0.29s   28%    22132k     10kiB / 1931B          20 / 19 +offlineimap -q   1.25s    0.32s   1.45s  108%    27276k    344kiB / 9038B         119 / 123 +offlineimap      4.72s    1.70s   5.05s  127%    26464k   5521kiB / 27kiB         399 / 392 + +10000 messages per mailbox +-------------------------- + +### No-op (in sync) ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.07s    0.00s   1.57s    4%    21896k     1942B / 898B           11 / 13 +offlineimap -q  12.10s    3.98s  11.67s  137%    58624k     37kiB / 10kiB          94 / 168 +offlineimap     55.49s   23.68s  51.50s  153%    70652k     54MiB / 57kiB        1072 / 996 + +### Reconciliation ### + +                  user   system    real   CPU   max RSS   traffic (in/out)    packets (in/out) +--------------  ------  -------  ------  ----  --------  ------------------  ------------------ +  interimap      0.08s    0.00s   1.73s    5%    23108k     10kiB / 1624B          20 / 23 +offlineimap -q  14.60s    5.22s  14.00s  141%    64988k   3028kiB / 15kiB         203 / 263 +offlineimap     57.24s   25.92s  53.72s  154%    76560k     54MiB / 89kiB        1981 / 1625 + +----------------------------------------------------------------------- + +Live synchronization  {#live-sync} +==================== + +97 mailboxes, 500000 messages in total: + +  - 2 with 100000 messages; +  - 10 with 10000 messages; +  - 20 with 5000 messages; +  - 45 with 2000 messages; and +  - 20 with 500 messages. + +The two local mail stores (respectively for [InterIMAP] and +[OfflineIMAP]) are initially in sync with the remote server, and we keep +long-running “autorefresh” synchronization processes alive for 6h, with +updates being regularly applied to the remote server: every 5 seconds, + +  - a new message is delivered to a random mailbox with 5% probability +    (once every 100s on average); +  - a random message is EXPUNGEd with 5% probability (once every 100s on +    average); and +  - a random message is marked as seen with 10% probability (once every +    50s on average). + +`interimap` is configured to sync every *30s*.  `offlineimap` is +configured to quick sync very *30s*, with a regular sync every *1h*. + +                 user    system   max RSS   traffic (in/out)    packets (in/out) +-----------  --------  --------  --------  ------------------  ------------------ +  interimap    12.95s     0.26s    24276k    743kiB / 257kiB       2207 / 4143 +offlineimap  5327.79s  1495.78s   394044k    942MiB / 7840kiB       87k / 126k + +Long-lived synchronization for large and busy mail stores is where +[InterIMAP] truly shines, in terms of CPU as well as network usage. +(The amount of CPU time spent in kernel mode is so low because the +process spends most of its time sleeping or in blocking calls waiting +for the server to compute `STATUS` responses.  Smart servers like +Dovecot should cache states though, hence are able to serve these +responses quickly.)  Thanks to the [`QRESYNC`][RFC 7162]-based +synchronization there is no need for complex client-side computation, +nor for sending vast amount of data over the network.  (To be fair, +while the amount of CPU time spent in user mode remains low, the local +IMAP server might do a bit of extra work which is not counted here.  But +here again caching helps avoid expensive directory traversal.)   The +performance gain is most appreciated for battery-powered devices, as +well as devices behind slow and/or high-latency network connections ☺. +Moreover [InterIMAP] *does* synchronize flag updates at every step, while +[OfflineIMAP] normally skips these in “quick” mode so might *delay* flag +updates for up to one hour. diff --git a/doc/getting-started.md b/doc/getting-started.md index 371449d..e20b71d 100644 --- a/doc/getting-started.md +++ b/doc/getting-started.md @@ -26,8 +26,8 @@ not*.  Instead, InterIMAP needs an [IMAP4rev1] server on *both* peers  to synchronize.  This may sound like a severe limitation at first, but by  seeing both local and remote mail storage though the same “IMAP lens”,  InterIMAP is able to take advantage of the abstraction layer and -perform significant optimizations, yielding much faster synchronization. -(*TODO* link to benchmark.) +perform significant optimizations, yielding [much faster](benchmark.html) +synchronization.  *Note*: InterIMAP uses the [Quick Mailbox Resynchronization][RFC 7162]  extension for stateful synchronization, hence won't work on IMAP servers  that don't advertise support for that extension. @@ -226,8 +226,9 @@ update is requested every minute.  Thanks to the [`QRESYNC`][RFC 7162]  IMAP extension a status update scales linearly with the number of  mailboxes (unlike [OfflineIMAP] *not* with the number of messages).  And  thanks to the `COMPRESS` extension, the typical volume of data exchanged -is rather small (*TODO* metrics).  You may even want to override the -default settings and reduce the interval between status updates to 20s: +[is rather small](benchmark.html#live-sync).  You may even want to +override the default settings and reduce the interval between status +updates to 20s:      $ mkdir -p ${XDG_CONFIG_HOME:-~/.config}/systemd/user/interimap.service.d  <!-- --> @@ -266,7 +267,7 @@ Other use-cases:  Benchmarks: -:   *TODO* +:   [Benchmark metrics and comparison](benchmark.html)  Manual diff --git a/doc/template.html b/doc/template.html index dbcc0e6..41bf3d7 100644 --- a/doc/template.html +++ b/doc/template.html @@ -20,6 +20,9 @@ $endif$      span.underline{text-decoration: underline;}      div.column{display: inline-block; vertical-align: top; width: 50%;}      pre{tab-size: 4; -moz-tab-size: 4;} +    table{width: 100%; margin-bottom: 3ex;} +    table > thead > tr.header > th{border-bottom: 2px solid #ddd; padding: 8px;} +    table > tbody > tr        > td{border-bottom: 1px solid #ddd; padding: 6px;}      @media only screen and (min-width: 600px) {        .parent {          float: right; | 
