#!/usr/bin/perl -T

#----------------------------------------------------------------------
# Pull mails from an IMAP mailbox and deliver them to an SMTP session
# Copyright © 2016-2020 Guilhem Moulin <guilhem@fripost.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#----------------------------------------------------------------------

use v5.20.2;
use strict;
use warnings;

our $VERSION = '0.5.6';
my $NAME = 'pullimap';

use Errno 'EINTR';
use Fcntl qw/O_CREAT O_RDWR O_DSYNC F_SETLK F_WRLCK SEEK_SET F_GETFD F_SETFD FD_CLOEXEC/;
use Getopt::Long qw/:config posix_default no_ignore_case gnu_getopt auto_version/;
use List::Util 'first';
use Socket qw/PF_INET PF_INET6 SOCK_STREAM IPPROTO_TCP/;

use Net::IMAP::InterIMAP 0.5.6 qw/xdg_basedir read_config compact_set/;

# Clean up PATH
$ENV{PATH} = join ':', qw{/usr/bin /bin};
delete @ENV{qw/IFS CDPATH ENV BASH_ENV/};

my %CONFIG;
sub usage(;$) {
    my $rv = shift // 0;
    if ($rv) {
        print STDERR "Usage: $NAME [OPTIONS] SECTION\n"
                    ."Try '$NAME --help' or consult the manpage for more information.\n";
    }
    else {
        print STDERR "Usage: $NAME [OPTIONS] SECTION\n"
                    ."Consult the manpage for more information.\n";
    }
    exit $rv;
}

usage(1) unless GetOptions(\%CONFIG, qw/config=s quiet|q debug+ help|h idle:i no-delivery/);
usage(0) if $CONFIG{help};
usage(1) unless $#ARGV == 0 and $ARGV[0] ne '_';


#######################################################################
# Read and validate configuration
#
my $CONF = do {
    my $conffile = delete($CONFIG{config}) // "config";
    $conffile = xdg_basedir( XDG_CONFIG_HOME => ".config", $NAME, $conffile );
    read_config( $conffile
               , [$ARGV[0]]
               , statefile => qr/\A(\P{Control}+)\z/
               , mailbox => qr/\A([\x01-\x7F]+)\z/
               , 'deliver-method' => qr/\A([ls]mtp:\[.*\]:\d+)\z/
               , 'deliver-ehlo' => qr/\A(\P{Control}+)\z/
               , 'deliver-rcpt' => qr/\A(\P{Control}+)\z/
               , 'purge-after' => qr/\A(\d*)\z/
               )->{$ARGV[0]};
};

my ($MAILBOX, $STATE);
do {
    $MAILBOX = $CONF->{mailbox} // 'INBOX';

    my $statefile = $CONF->{statefile} // $ARGV[0];
    die "Missing option statefile" unless defined $statefile;
    $statefile = xdg_basedir( XDG_DATA_HOME => ".local/share", $NAME, $statefile );

    my $mode = O_RDWR | O_DSYNC;
    # don't auto-create in long-lived mode
    $mode |= O_CREAT unless defined $CONFIG{idle};

    sysopen($STATE, $statefile, $mode, 0600) or die "Can't open $statefile: $!";
    # XXX we need to pack the struct flock manually: not portable!
    my $struct_flock = pack('s!s!l!l!i!', F_WRLCK, SEEK_SET, 0, 0, 0);
    fcntl($STATE, F_SETLK, $struct_flock) or die "Can't lock $statefile: $!";
    my $flags = fcntl($STATE, F_GETFD, 0)       or die "fcntl F_GETFD: $!";
    fcntl($STATE, F_SETFD, $flags | FD_CLOEXEC) or die "fcntl F_SETFD: $!";

    # We have no version number in the statefile, but if we ever need a
    # migration, we'll add a 1-byte header for the version number, and
    # assume version 1.0 if the size of the file is a multiple of 4
    # bytes. (We can also use the fact that bytes 5 to 8 are never all 0.)
};


#######################################################################

# Read a UID (32-bits integer) from the statefile, or undef if we're at
# the end of the statefile
sub readUID() {
    my $n = sysread($STATE, my $buf, 4) // die "read: $!";
    return if $n == 0; # EOF
    # file length is a multiple of 4 bytes, and we always read 4 bytes at a time
    die "Corrupted state file!" if $n != 4;
    unpack('N', $buf);
}

# Write a UID (32-bits integer) to the statefile
sub writeUID($) {
    my $uid = pack('N', shift);
    my $offset = 0;
    for ( my $offset = 0
        ; $offset < 4
        ; $offset += syswrite($STATE, $uid, 4-$offset, $offset) // die "write: $!"
    ) {}
    # no need to sync (or flush) since $STATE is opened with O_DSYNC
}


#######################################################################
# SMTP/LMTP part
#
my ($SMTP, $SMTP_PIPELINING);
sub sendmail($$) {
    my ($from, $rfc822) = @_;
    unless (defined $SMTP) {
        # TODO we need to be able to reconnect when the server closes
        # the connection due to a timeout (RFC 5321 section 4.5.3.2)
        my ($fam, $addr) = (PF_INET, $CONF->{'deliver-method'} // 'smtp:[127.0.0.1]:25');
        $addr =~ s/^([ls]mtp):// or die;
        my $ehlo = $1 eq 'lmtp' ? 'LHLO' : $1 eq 'smtp' ? 'EHLO' : die;
        $ehlo .= ' '. ($CONF->{'deliver-ehlo'} // 'localhost.localdomain');

        my $port = $addr =~ s/:(\d+)$// ? $1 : die;
        $addr =~ s/^\[(.*)\]$/$1/ or die;
        $fam = PF_INET6 if $addr =~ /:/;
        $addr = Socket::inet_pton($fam, $addr) // die "Invalid address $addr\n";
        my $sockaddr = $fam == PF_INET  ? Socket::pack_sockaddr_in($port,  $addr)
                     : $fam == PF_INET6 ? Socket::pack_sockaddr_in6($port, $addr)
                     : die;

        socket($SMTP, $fam, SOCK_STREAM, IPPROTO_TCP) or die "socket: $!";
        until (connect($SMTP, $sockaddr)) {
            next if $! == EINTR; # try again if connect(2) was interrupted by a signal
            die "connect: $!";
        }
        binmode($SMTP) // die "binmode: $!";

        smtp_resp('220');
        my @r = smtp_send($ehlo => '250');
        $SMTP_PIPELINING = grep {$_ eq 'PIPELINING'} @r; # SMTP pipelining (RFC 2920)
    }
    my $rcpt = $CONF->{'deliver-rcpt'} // getpwuid($>) // die;

    # return codes are from RFC 5321 section 4.3.2
    smtp_send( "MAIL FROM:<$from>" => '250'
             , "RCPT TO:<$rcpt>"   => '250'
             , "DATA"              => '354'
             );

    print STDERR "C: [...]\n" if $CONFIG{debug};
    if (!defined $$rfc822 or $$rfc822 eq "") {
        # RFC 5321 section 4.1.1.4: if there was no mail data, the first
        # "\r\n" ends the DATA command itself
        $SMTP->printflush("\r\n.\r\n") or die;
    } else {
        my $offset = 0;
        my $length = length($$rfc822);
        while ((my $end = index($$rfc822, "\r\n", $offset) + 2) != 1) {
            my $line = substr($$rfc822, $offset, $end-$offset);
            # RFC 5321 sec. 4.5.2: if the line starts with a dot, double it
            $line = ".".$line if substr($line, 0, 1) eq ".";
            $SMTP->print($line) or die;
            $offset = $end;
        }
        if ($offset < $length) {
            # the last line did not end with "\r\n"; add it in order to
            # have the receiving SMTP server recognize the "end of data"
            # condition.  See RFC 5321 sec. 4.1.1.4
            my $line = substr($$rfc822, $offset);
            $line = ".".$line if substr($line, 0, 1) eq ".";
            $SMTP->print($line, "\r\n") or die;
        }
        $SMTP->printflush(".\r\n") or die;
    }
    smtp_resp('250');
}
sub smtp_resp($) {
    my $code = shift;
    my @resp;
    while(1) {
        local $_ = $SMTP->getline() // die;
        s/\r\n\z// or die "Invalid SMTP reply: $_";
        print STDERR "S: $_\n" if $CONFIG{debug};
        s/\A\Q$code\E([ -])// or die "SMTP error: Expected $code, got: $_\n";
        push @resp, $_;
        return @resp if $1 eq ' ';
    }
}
sub smtp_send(@) {
    my (@cmd, @code, @r);
    while (@_) {
        push @cmd,  shift // die;
        push @code, shift // die;
    }
    if ($SMTP_PIPELINING) { # SMTP pipelining (RFC 2920)
        print STDERR (map {"C: $_\n"} @cmd) if $CONFIG{debug};
        $SMTP->printflush(map {"$_\r\n"} @cmd) or die;
        @r = smtp_resp($_) foreach @code;
    }
    else {
        foreach (@cmd) {
            print STDERR "C: $_\n" if $CONFIG{debug};
            $SMTP->printflush("$_\r\n") or die;
            @r = smtp_resp(shift(@code));
        }
    }
    return @r;
}


#######################################################################
# Initialize the cache from the statefile, then pull new messages from
# the remote mailbox
#
my $IMAP = do {
    my %config = (%$CONF, %CONFIG{qw/quiet debug/}, name => $ARGV[0]);
    $config{keepalive} = 1 if defined $CONFIG{idle};
    $config{'logger-prefix'} = "%?n?%?m?%n(%m)&%n?: ?";
    delete $config{mailbox}; # use SELECTed mailbox in log messages
    Net::IMAP::InterIMAP::->new( %config );
};

# Remove messages with UID < UIDNEXT and INTERNALDATE at most
# $CONF->{'purge-after'} days ago.
my $LAST_PURGED;
sub purge() {
    my $days = $CONF->{'purge-after'} // return;
    my ($uidnext) = $IMAP->get_cache('UIDNEXT');
    return unless $days ne '' and 1<$uidnext;
    my $set = "1:".($uidnext-1);

    unless ($days == 0) {
        my $now = time;
        return if defined $LAST_PURGED and $now - $LAST_PURGED < 43200; # purge every 12h
        $LAST_PURGED = $now;

        my @now = gmtime($now - $days*86400);
        my @m = qw/Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec/; # RFC 3501's date-month
        my $date = sprintf("%02d-%s-%04d", $now[3], $m[$now[4]], $now[5]+1900);
        my $ext = $IMAP->incapable('ESEARCH') ? undef : [qw/COUNT ALL/];
        my @uid = $IMAP->search((defined $ext ? "RETURN (".join(' ', @$ext).') ' : '')
                                ."UID $set BEFORE $date");
        my $count;
        if (defined $ext) {
            my ($uid_indicator, %resp) = @uid;
            $IMAP->panic() unless defined $uid_indicator and $uid_indicator = 'UID';
            $count = $resp{COUNT} // $IMAP->panic();
            $set = $resp{ALL}; # MUST NOT be present if there are no matches
        } else {
            $count = $#uid+1;
            $set = $count == 0 ? undef : compact_set(@uid);
        }
        $IMAP->log("Removing $count UID(s) $set") if $count > 0 and !$CONFIG{quiet};
    }

    if (defined $set) {
        $IMAP->silent_store($set, '+', '\Deleted');
        $IMAP->expunge($set);
    }

    # pull messages that have been received in the meantime
    pull() if $IMAP->has_new_mails($MAILBOX);
}

# Use BODY.PEEK[] so if something gets wrong, unpulled messages
# won't be marked as \Seen in the mailbox
my $ATTRS = "ENVELOPE INTERNALDATE";
$ATTRS .= " BODY.PEEK[]" unless $CONFIG{'no-delivery'};

my $RE_ATOM = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]+/;
my $DOT_STRING = qr/\A$RE_ATOM(?:\.$RE_ATOM)*\z/;
sub pull_callback($$) {
    my ($uids, $mail) = @_;
    return unless exists $mail->{RFC822} or $CONFIG{'no-delivery'}; # not for us

    my $uid = $mail->{UID};
    my $e = $mail->{ENVELOPE}->[3];
    my $sender = '';
    if (defined $e and defined (my $l = $e->[0]->[2]) and defined (my $d = $e->[0]->[3])) {
        if ($l =~ $DOT_STRING) {
            $sender = $l.'@'.$d;
        } elsif ($l =~ /\A[\x20-\x7E]*\z/) {
            # quote the local part if not Dot-string (RFC 5321)
            $l =~ s/([\x22\x5C])/\\$1/g; # escape double-quote and backslash
            $sender = '"'.$l.'"@'.$d;
        }
    }
    $IMAP->log("UID $uid from <$sender> ($mail->{INTERNALDATE})") unless $CONFIG{quiet};

    sendmail($sender, $mail->{RFC822}) unless $CONFIG{'no-delivery'};

    push @$uids, $uid;
    writeUID($uid);
}

# Pull new messages from IMAP and deliver them to SMTP, then update the
# statefile
sub pull(;$) {
    my $ignore = shift // [];
    my @uid;

    my $callback = sub($) { pull_callback(\@uid, shift) };

    do {
        # invariant: we're at pos 8 + 4*(1+$#ignore + 1+$#uids) in the statefile
        $IMAP->pull_new_messages($ATTRS, $callback, @$ignore);

        # now that everything has been deliverd, mark @ignore and @uid as \Seen
        $IMAP->silent_store(compact_set(@$ignore, @uid), '+', '\Seen') if @$ignore or @uid;
    }
    # repeat if we got a message in the meantime
    while ($IMAP->has_new_mails($MAILBOX));

    # terminate the SMTP transmission channel gracefully, cf RFC 5321 section 4.5.3.2
    smtp_send('QUIT' => '221') if defined $SMTP;
    undef $SMTP;

    # update the statefile
    my $p = sysseek($STATE, 4, SEEK_SET) // die "seek: $!";
    die "Couldn't seek to 4" unless $p == 4; # safety check
    my ($uidnext) = $IMAP->get_cache('UIDNEXT');
    writeUID($uidnext);
    truncate($STATE, 8) // die "truncate: $!";
}

do {
    my $uidvalidity = readUID();
    my $uidnext = readUID();
    my $ignore = [];

    $IMAP->set_cache($MAILBOX, UIDVALIDITY => $uidvalidity, UIDNEXT => $uidnext);
    $IMAP->select($MAILBOX);

    unless (defined $uidvalidity) {
        ($uidvalidity) = $IMAP->get_cache('UIDVALIDITY');
        # we were at pos 0 before the write, at pos 4 afterwards
        writeUID($uidvalidity);
        die if defined $uidnext; # sanity check
    }

    if (!defined $uidnext) {
        # we were at pos 4 before the write, at pos 8 afterwards
        writeUID(1);
    }
    else {
        # put the remaining UIDs in the @ignore list: these messages
        # have already been delivered, but the process exited before the
        # statefile was updated
        while (defined (my $uid = readUID())) {
            push @$ignore, $uid;
        }
    }
    pull($ignore);
    purge();
};
unless (defined $CONFIG{idle}) {
    $IMAP->logout();
    exit 0;
}

$CONFIG{idle} = 1740 if defined $CONFIG{idle} and $CONFIG{idle} == 0; # 29 mins
while(1) {
    pull() if $IMAP->idle($CONFIG{idle}, \&Net::IMAP::InterIMAP::has_new_mails);
    purge();
}