#! /usr/bin/perl -w use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help); use Pod::Usage; use IPC::Open2; use IPC::Open3; use POSIX qw(floor); use strict; =head1 NAME pdftool.pl - a PDF swiss army knife =head1 SYNOPSIS B [-s I] [-w I ] [-h I] [-p I] [-W I] [-H I] [-P I] [-p I] [-m I] [-b I] [-c] [--book] [--column] [-n I] [-q] [I [I]] =head1 DESCRIPTION I combines the tools in the PSUtils bundle in a nice way. The input should be either a Portable Document Format (PDF) file, or a PostScript file. The output format is the same as the input one. If no input file is given, or if a single hyphen-minus (B<->) is given as file name, I will read the PDF or PostScript data from the standard input. In that case, and if the input data is in PDF format, an auxiliary file will be created (since the conversion from PDF to PS requires random access to the data), and removed afterwards. Also, if the crop option (B<-c>) is set, an auxiliary file will be created, and removed afterwards. If no output file is given, or if a single hyphen-minus (B<->) is given as file name, I will send the data (of the same type as the input) to the standard output. The document will be treated as follows: =over 8 =item Conversion from PDF to PostScript (if necessary), =item Selection of the page range, =item Rearranging pages for printing books or booklets, =item Putting multiple pages per sheets, =item Conversion from PS to PDF (if necessary). =back =head1 OPTIONS =over 8 =item B<-s, --select> Specifies the pages which are to be selected. I is a comma separated list of page ranges, each of which may be a page number, or a page range of the form I-I. If I is omitted, the first page is assumed, and if I is omitted, the last page is assumed. The prefix character `_' indicates that the page number is relative to the end of the document, counting backwards. If just this character with no page number is used, a blank page will be inserted. =item B<-w, --width> Specify the width of the output file. If the height is not specified as well, it will be ignored. The known units are B, B, B and B. The default unit is B. =item B<-h, --height> Specify the height of the output file. If the width is not specified as well, it will be ignored. The known units are B, B, B and B. The default unit is B. =item B<-p, --paper> Specify the paper size of the output file, as an alternative to B<-w> and B<-h>. Can be set to B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, or B<10x14>. The default output paper size is B. =item B<-W, --Width> Same as the option B<-w>, but for the input file. This option is useless if the crop option (B<-c>) is set. =item B<-H, --Height> Same as the option B<-h>, but for the input file. This option is useless if the crop option (B<-c>) is set. =item B<-P, --Paper> Same as the option B<-p>, but for the input file. By default, I will try to guess this value from the header of the file, and fail if the information is missing. This option is useless if the crop option (B<-c>) is set. =item B<-b, --border> Add a margin around each logical page on a sheet. Possible units are B, B, B and B. The default unit is B. The default border is B<1cm> if the crop option (B<-c>) is set, and B<0> otherwise. =item B<-m, --margin> Add a margin around the whole page. Possible units are B, B, B and B. The default unit is B. The default margin is B<0>. =item B<-c, --crop> If this option is set, the PostScript code will interpreted to calculate the maximal effective bounding box. This operation may be quite demanding for the CPU. =item B<--book> Rearrange pages for printing books or booklets. =item B<-n, --nup> Puts multiple logical pages onto each physical sheet of paper. The inner margin might be same as the outer one (depending on the booklet option B<-b>), see B<-m> for details. If I is less than 10, the option B<->I may be used as an alternative. =item B<--column> Changes the order to `column-major', where successive pages are placed in columns down the paper. =item B<-q, --quiet> I normally prints the page numbers of the pages output; this option suppresses this. =item B<--help> Display a brief help. =item B<--version> Display the version number of the I program. =item B<--man> Display the manual page. =back =head1 EXAMPLES The following comand can be used to remotely crop a PDF file, convert it to A4 paper, and rearrange the pages to print a booklet: ssh remote pdftool.pl -cpA4 --book -2 -b2cm -m-1cm < in.pdf > out.pdf =head1 REQUIRE Requires PSUtils installed and available in the command line http://www.tardis.ed.ac.uk/~ajcd/psutils/. =head1 AUTHOR Public domain, (c) Guilhem Moulin. =head1 VERSION Version: 0.3, 27 December 2010 =cut # TODO: inline it in the header $main::VERSION = "0.3, 27 December 2010"; my $tmpdir = '/tmp'; # # Options & arguments # my $select; my ($outwidth,$outheight, $inwidth,$inheight); my ($margin, $border); my $crop; my $book; my $nup = 1; my $column; my $quiet; my $man; GetOptions( "select|s=s" => \$select, "w|width=s" => \$outwidth, "h|height=s" => \$outheight, "p|paper=s" => sub { &papersize ($_[1],\$outwidth,\$outheight) }, "W|Width=s" => \$inwidth, "H|Height=s" => \$inheight, "P|Paper=s" => sub { &papersize ($_[1],\$inwidth,\$inheight) }, "margin|m=s" => \$margin, "border|b=s" => \$border, "crop|c" => \$crop, "book" => \$book, "nup|n=i" => \$nup, "1" => sub { $nup = 1 }, "2" => sub { $nup = 2 }, "3" => sub { $nup = 3 }, "4" => sub { $nup = 4 }, "5" => sub { $nup = 5 }, "6" => sub { $nup = 6 }, "7" => sub { $nup = 7 }, "8" => sub { $nup = 8 }, "9" => sub { $nup = 9 }, "column" => \$column, "q|quiet" => \$quiet, "man" => \$man ) or pod2usage(2); pod2usage(2) if ($#ARGV > 1); pod2usage(-exitstatus => 0, -verbose => 2) if defined $man; # Input and output files my ($infile, $outfile) = @ARGV; # # Default values # # Default margin & border $margin = 0 unless defined $margin; unless (defined $border) { if (defined $crop) { $border = '1cm'; } else { $border = 0; } } # Default output papersize &papersize ("a4", \$outwidth, \$outheight) unless (defined $outwidth and defined $outheight); # Default unit: PostScript point map {&topoints ($_)} ( \$outwidth, \$outheight, \$inwidth, \$inheight, \$margin, \$border ); die "Margins are too big" if $outwidth <= $margin*2 or $outheight <= $margin*2; # Open input and output files my $infile_display; if (defined $infile && $infile ne "-") { open FIN, '<', "$infile" or die "Can't read `$infile': $!"; $infile_display = $infile; } else { undef $infile; *FIN = *STDIN; $infile_display = "(stdin)"; } # After the pipe, it won't be detected as seekable my $inseek = (seek FIN, 0, 1) ? 1 : undef; if (defined $outfile && $outfile ne "-") { open FOUT, '>', "$outfile" or die "Can't create `$outfile': $!"; } else { *FOUT = *STDOUT; } *LOG = *STDERR; # # Detect filetype # # To avoid to seek into FIN, it gonna be copied from WRITE to READ in # background, once the filetype has been read # TODO: read specification to detect filetype properly my $filetype; pipe *READ, *WRITE or die "Can't pipe: $!"; while (not (defined $filetype) && defined (my $l = )) { print WRITE $l or die "Can't close: $!"; if (defined $l && $l =~ /^%!PS/) { $filetype = "PS"; } elsif (defined $l && $l =~ /^%PDF/) { $filetype = "PDF"; } } die "Cannot recognise FileType" unless defined $filetype; unless (my $pid = fork) { # Child: cat FIN > WRITE in background die "Can't fork: $!" unless defined $pid; close READ or die "Can't close: $!"; while () { print WRITE or die "Can't print: $!"; } exit; } # Parent close WRITE or die "Can't close: $!"; close FIN or die "Can't close: $!"; open *FIN, "<&READ" or die "Can't dup: $!"; # Auxiliary files, to remove my @auxfiles; # Pids, to waid for my @pids; # # Conversion from PDF to PS, if necessary # my @cmd; if ($filetype eq "PDF") { unless (defined $infile && $inseek) { # Need to copy the whole input to an auxiliary file, since # conversion from PDF to PS requires random access to the data $infile = "$tmpdir/pdftool-stdin-$$" . lc ".$filetype"; open FINAUX, '>', "$infile" or die "Can't write into `$infile': $!"; push @auxfiles, "$infile"; # cat > $infile while () { print FINAUX or die "Can't print: $!"; } close FINAUX; } my ($first, $last); if (defined $select) { # Convert to PS only the pages we are interested in ($first, $last) = (1<<16,-(1<<16)); for (split / *, */, $select) { my ($rmin, $rmax) = split (/ *- */, $_); undef $first if defined $rmin && not $rmin; undef $last if defined $rmax && not $rmax; if (defined $rmin && $rmin) { $first = $rmin if defined $first && $rmin < $first; $last = $rmin if defined $last && $rmin > $last; } if (defined $rmax && $rmax) { $first = $rmax if defined $first && $rmax < $first; $last = $rmax if defined $last && $rmax > $last; } } # Calculate the new page range my @newselect; for (split / *, */, $select) { my ($rmin, $rmax) = split / *- */, $_; $rmin -= $first-1 if defined $first && defined $rmin && $rmin; $rmax -= $first-1 if defined $first && defined $rmax && $rmax; my $r = ""; $r .= $rmin if defined $rmin; $r .= "-"; $r .= $rmax if defined $rmax; print "$r\n"; push @newselect, $r; } $select = join ',', @newselect; } # Convert to PS @cmd = ('pdftops', '-origpagesizes', "$infile", '-'); push @cmd, '-f', $first if defined $first; push @cmd, '-l', $last if defined $last; push @cmd, '-q' if defined $quiet; my $pid = open *PSIN, "-|", @cmd or die "Can't run `" . &printcmd (@cmd) . "'"; push @pids, [$pid, @cmd]; } else { open *PSIN, "<&FIN" or die "Can't dup: $!"; } open *IN, "<&PSIN" or die "Can't dup: $!"; # # Select, if necessary # if (defined $select) { @cmd = ('psselect', "-p$select"); push @cmd, '-q' if defined $quiet; my $pid = open2 *OUT, "<&IN", @cmd; push @pids, [$pid, @cmd]; open *IN, "<&OUT" or die "Can't dup: $!"; } # # Bounding box # my @bbox; if (defined $crop) { # Calculate the maximal bounding box unless (seek IN, 0, 1) { # The input is not seekable: have to create a seekable auxiliary file my $auxfile = "$tmpdir/psresize-stdin-$$.ps"; open AUXFD, '>', "$auxfile" or die "Can't write into `$auxfile': $!"; push @auxfiles, $auxfile; # cat > $auxfile while () { print AUXFD or die "Can't print: $!"; } close AUXFD or die "Can't close: $!"; close IN or die "Can't close: $!"; open IN, '<', "$auxfile" or die "Can't read `$auxfile': $!"; } # Need to duplicate IN, since it will be closed in the parent process open *GSIN, '<&IN'; @cmd = ('gs', '-sDEVICE=bbox', '-dBATCH', '-dNOPAUSE', '-'); my $pid = open3 "<&GSIN", ">&GSOUT", *GSOUT, @cmd; my ($p,$c) = (0,0); # Page & character counter my ($x0, $y0, $x1, $y1) = (1<<16, 1<<16, -(1<<16), -(1<<16)); while () { if ($_ =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/) { $x0 = $1 if $1 < $x0; $y0 = $2 if $2 < $y0; $x1 = $3 if $3 > $x1; $y1 = $4 if $4 > $y1; unless (defined $quiet) { my $s = "[" . ++$p . "] "; $c += length $s; if ($c >= 80) { print LOG "\n" or die "Can't close: $!"; $c = length $s; } print LOG $s or die "Can't close: $!"; } } } close GSOUT or die "Can't close: $!";; print LOG "\n" or die "Can't close: $!" unless defined $quiet; # No zombie processes waitpid $pid, 0; die "Can't run `" . &printcmd (@cmd) . "'" if $? >> 8; die "Error when calculating bounding box" if ($x0 >= $x1 || $y0 >= $y1); @bbox = ($x0, $y0, $x1, $y1); # Let's go back to the beginning of the input seek IN, 0, 0 or die "$!"; } elsif (defined $inwidth and defined $inheight) { @bbox = (0, 0, $inwidth, $inheight); } else { # Guess page size from the input file # To avoid to seek into IN, it gonna be copied from WRITE to READ # in background, once the Bounding Box has been read pipe *READ, *WRITE or die "Can't pipe: $!"; while (not (@bbox) && defined (my $l = )) { print WRITE $l or die "Can't close: $!"; @bbox = ($1, $2, $3, $4) if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/); } die "Cannot guess input page size!" unless @bbox; unless (my $pid = fork) { # Child: cat IN > WRITE in background die "Can't fork: $!" unless defined $pid; close READ or die "Can't close: $!";; while () { print WRITE or die "Can't close: $!"; } exit; } # Parent close WRITE or die "Can't close: $!"; close IN or die "Can't close: $!"; open *IN, "<&READ" or die "Can't dup: $!"; } # # PSBook # if (defined $book) { @cmd = ('psbook'); push @cmd, '-q' if defined $quiet; my $pid = open2 *OUT, "<&IN", @cmd; open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; } # # PSNup # ($outheight, $outwidth) = ($outwidth, $outheight) if (($bbox[2]-$bbox[0] > $bbox[3]-$bbox[1]) xor ($outwidth-2*$margin > $outheight-2*$margin)); my ($horiz, $vert, $rotate, $scale, $hshift, $vshift) = &calc_layout ($nup, $border, \@bbox, $outwidth-2*$margin, $outheight-2*$margin); my @specs = &calc_specs ($horiz, $vert, $rotate, $scale, [$outwidth-2*$margin, $outheight-2*$margin, $hshift, $vshift]); my $pagespecs = "$nup:" . join ('+', @specs); @cmd = ('pstops', '-w', $bbox[2], '-h', $bbox[3], $pagespecs); push @cmd, '-q' if defined $quiet; my $pid = open2 *OUT, "<&IN", @cmd; open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; # # Final file: Convert back to PDF # @cmd = ('gs', "-sDEVICE=pdfwrite", "-sOutputFile=%stdout%", "-dBATCH", "-dNOPAUSE", "-dAutoRotatePages=/None", "-c", "<< /Orientation $rotate /PageSize [$outwidth $outheight] >> setpagedevice", "-f", "-"); $pid = open2 ">&FOUT", "<&IN", @cmd; push @pids, [$pid, @cmd]; # Avoid zombies map { my ($pid, @cmd) = @$_; # print STDERR "PID: ", $pid, " Cmd: `", &printcmd (@cmd), "'"; my ($r,$v) = (waitpid ($pid, 0), $?); warn "Can't run `" . &printcmd (@cmd) . "'" if ($r != -1 and $v >> 8); } @pids; map { close $_ or die "Can't close: $!" } ( *READ, *FIN, *FOUT ); # Delete auxiliary files unlink @auxfiles; # Useless, but Perl doesn't see that this filehandle is used more than # one time (and even automatically closed by `open3') exit; close GSIN; # ========================================================= # # Finding the best layout is an optimisation problem. We try all of the # combinations of width*height in both normal and rotated form, and # minimise the wasted space. # sub calc_layout { my ($nup, $border, $bbox, $outwidth, $outheight) = @_; my ($inwidth, $inheight) = ($bbox[2]-$bbox[0], $bbox[3]-$bbox[1]); my ($horiz, $vert, $rotate, $scale, $hshift, $vshift); my $tolerance = 100000; # layout tolerance my $best = $tolerance; for (my $hor = 1; $hor; $hor = &nextdiv($hor, $nup)) { my $ver = $nup / $hor; # try normal orientation first my $scl = &min ($outheight/($inheight*$ver), $outwidth/($inwidth*$hor)); my $optim = ($outwidth-$scl*$inwidth*$hor)*($outwidth-$scl*$inwidth*$hor) + ($outheight-$scl*$inheight*$ver)*($outheight-$scl*$inheight*$ver); if ($optim < $best) { $best = $optim; # recalculate scale to allow for internal borders $scale = &min (($outheight-2*$border*$ver)/($inheight*$ver), ($outwidth-2*$border*$hor)/($inwidth*$hor)); $hshift = ($outwidth/$hor - ($bbox[2]+$bbox[0])*$scale)/2; $vshift = ($outheight/$ver - ($bbox[3]+$bbox[1])*$scale)/2; ($horiz, $vert) = ($hor, $ver); $rotate = 0; } # try rotated orientation $scl = &min ($outheight/($inwidth*$hor), $outwidth/($inheight*$ver)); $optim = ($outheight-$scl*$inwidth*$hor)*($outheight-$scl*$inwidth*$hor) + ($outwidth-$scl*$inheight*$ver)*($outwidth-$scl*$inheight*$ver); if ($optim < $best) { $best = $optim; # recalculate scale to allow for internal borders $scale = &min (($outheight-2*$border*$hor)/($inwidth*$hor), ($outwidth-2*$border*$ver)/($inheight*$ver)); $hshift = ($outwidth/$ver - ($bbox[3]+$bbox[1])*$scale)/2; $vshift = ($outheight/$hor - ($bbox[2]+$bbox[0])*$scale)/2; ($horiz, $vert) = ($ver, $hor); $rotate = 3; } } # fail if nothing better than worst tolerance was found die "Can't find acceptable layout for $nup-up" if $best == $tolerance; return ($horiz, $vert, $rotate, $scale, $hshift, $vshift); } # # Construct pstops specification list # sub calc_specs { my ($horiz, $vert, $rotate, $scale, $bbox) = @_; my ($outwidth, $outheight, $hshift, $vshift) = @$bbox; my @specs; for (my $pageno = 0; $pageno < $horiz*$vert; $pageno++) { my ($up, $across); # pageno index my $rot = ""; my ($xoff, $yoff); if ($rotate) { if (defined $column) { # column=0; leftright=1; topbottom=0; $across = $pageno % $horiz; $up = floor ($pageno / $horiz); } else { # column=1; leftright=1; topbottom=0; $across = floor($pageno / $vert); $up = $pageno % $vert; } $rot = 'L'; $xoff = $margin + ($across+1)*$outwidth/$horiz - $hshift; } else { if (defined $column) { # column=1; leftright=1; topbottom=1; $across = floor($pageno / $vert); $up = $vert-1 - floor($pageno % $vert); } else { # column=0; leftright=1; topbottom=1; $across = $pageno % $horiz; $up = $vert-1 - floor($pageno / $horiz); } $xoff = $margin + $across*$outwidth/$horiz + $hshift; } $yoff = $margin + $up*$outheight/$vert + $vshift; push @specs, sprintf ("%d%s@%.3f(%.3f,%.3f)", $pageno, $rot, $scale, $xoff, $yoff); } return @specs; } # # In-place convert the given length to PostScript points # sub topoints { my $l = $_[0]; return unless defined $$l; $$l =~ /^([+-]?\d*\.?\d+)(\w*)$/ or die "Unable to parse `$$l'"; my $r = $1; if ($2 eq "" or $2 eq "pt") { # nothing } elsif ($2 eq "in") { $r *= 72; } elsif ($2 eq "cm") { $r *= 72/2.54; } elsif ($2 eq "mm") { $r *= 72/25.4; } else { die "Unknown unit: `$2'"; } $$l = floor ($r + .5); } # # In-place set the given width and height to the predefined papersize # sub papersize { my ($p,$w,$h) = @_; $p = lc $p; if ($p eq "a0") { ($$w,$$h) = (2384, 3370); # 84.1cm * 118.9cm } elsif ($p eq "a1") { ($$w,$$h) = (1684, 2384); # 59.4cm * 84.1cm } elsif ($p eq "a2") { ($$w,$$h) = (1191, 1684); # 42cm * 59.4cm } elsif ($p eq "a3") { ($$w,$$h) = (842, 1191); # 29.7cm * 42cm } elsif ($p eq "a4") { ($$w,$$h) = (595, 842); # 21cm * 29.7cm } elsif ($p eq "a5") { ($$w,$$h) = (421, 595); # 14.85cm * 21cm } elsif ($p eq "b5") { ($$w,$$h) = (516, 729); # 18.2cm * 25.72cm } elsif ($p eq "letter") { ($$w,$$h) = (612, 792); # 8.5in * 11in } elsif ($p eq "legal") { ($$w,$$h) = (612, 1008); # 8.5in * 14in } elsif ($p eq "ledger") { ($$w,$$h) = (1224, 792); # 17in * 11in } elsif ($p eq "tabloid") { ($$w,$$h) = (792, 1224); # 11in * 17in } elsif ($p eq "statement") { ($$w,$$h) = (396, 612); # 5.5in * 8.5in } elsif ($p eq "executive") { ($$w,$$h) = (540, 720); # 7.6in * 10in } elsif ($p eq "folio") { ($$w,$$h) = (612, 936); # 8.5in * 13in } elsif ($p eq "quarto") { ($$w,$$h) = (610, 780); # 8.5in * 10.83in } elsif ($p eq "10x14") { ($$w,$$h) = ("10in", "14in"); } else { die "Unknown paper size: `$p'"; } } # # Print a command just like you'd do in a shell # sub printcmd { my @cmd; for (@_) { my $s = $_; $s =~ s/"/\\"/; $s = "\"$s\"" if $s =~ /[ ()';#{}*?~&|`]/; push @cmd, $s; } join ' ', @cmd; } sub nextdiv { my ($n, $m) = @_; while (++$n <= $m) { return $n if ($m % $n == 0) } return 0; } sub min { my ($n, $m) = @_; return $n if $n < $m; return $m; } sub round { return floor ($_[0] + .5); }