#! /usr/bin/perl -w # This program is free software. It comes without any warranty, to the # extent permitted by applicable law. You can redistribute it and/or # modify it under the terms of the Do What The Fuck You Want To Public # License, Version 2, as published by Sam Hocevar. # See http://sam.zoy.org/wtfpl/COPYING for more details. $VERSION = "0.4, 12 May 2011"; use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help); use Pod::Usage; use IPC::Open2; use IPC::Open3; use POSIX qw(floor); use strict; =head1 NAME pdftool.pl - a PDF swiss army knife =head1 SYNOPSIS B [B<-w> I ] [B<-h> I] [B<-p> I] [B<-W> I] [B<-H> I] [B<-P> I] [B<-s> I] [B<-m> I] [B<-b> I] [B<-c>] [B<--book>] [B<--column>] [B<-n> I] [B<--screen>] [B<-q>] [I [I]] =head1 DESCRIPTION B combines the tools in the PSUtils bundle in a nice way. The input should be either a Portable Document Format (PDF) file, or a PostScript file. The output format is PDF only. If no input file is given, or if a single hyphen-minus (B<->) is given as file name, B will read the PDF or PostScript data from the standard input. In that case, and if the input data is in PDF format, an auxiliary file will be created (since the conversion from PDF to PS requires random access to the data), and removed afterwards. Also, if the crop option (B<-c>) is set, an auxiliary file will be created, and removed afterwards. If no output file is given, or if a single hyphen-minus (B<->) is given as file name, B will send the data to the standard output. By default, B rotates the pages in order to ensure that your pdf will be printable using your favorite duplex mode for portrait documents (Tumble if you prefer to turn the pages like those of a book). See the B<--screen> option to bypass this behavior. The document will be treated as follows: =over 4 =item * Convert from PDF to PostScript (if necessary, and only for smallest interval that contains all the selected pages), =item * Select of the page range, =item * Calculate the minimal bounding box =item * Rearrange pages for printing books or booklets, =item * Put multiple pages per sheets, =item * Convert back from PS to PDF. =back =head1 OPTIONS =over 8 =item B<-s, --select=>I Specify the pages which are to be selected. I is a comma separated list of page ranges, each of which may be a page number, or a page range of the form I-I. If I is omitted, the first page is assumed, and if I is omitted, the last page is assumed. The prefix character `_' indicates that the page number is relative to the end of the document, counting backwards. If just this character with no page number is used, a blank page will be inserted. =item B<-w, --width=>I Specify the width of the output file. If the height is not specified as well, it will be ignored. The known units are B, B, B and B. The default unit is B. =item B<-h, --height=>I Specify the height of the output file. If the width is not specified as well, it will be ignored. The known units are B, B, B and B. The default unit is B. =item B<-p, --paper=>I Specify the paper size of the output file, as an alternative to B<-w> and B<-h>. Can be set to B, B, B, B, B, B, B, B, B, B, B, B, B, B, B, or B<10x14>. The default output paper size is B. =item B<-W, --Width=>I Same as the option B<-w>, but for the input file. This option is ignored if the crop option (B<-c>) is set. =item B<-H, --Height=>I Same as the option B<-h>, but for the input file. This option is ignored if the crop option (B<-c>) is set. =item B<-P, --Paper=>I Same as the option B<-p>, but for the input file. By default, B will try to guess this value from the header of the file, and will fail if the information is missing. This option is ignored if the crop option (B<-c>) is set. =item B<-b, --border=>I Add a margin around each logical page on a sheet. Possible units are B, B, B and B. The default unit is B. The default border is B<1cm> if the crop option (B<-c>) is set, and B<0> otherwise. =item B<-m, --margin=>I Add a margin around the whole page. Possible units are B, B, B and B. The default unit is B. The default margin is B<0>. =item B<-c, --crop> If this option is set, the PostScript code will interpreted to calculate the maximal effective bounding box. This operation may take time and be quite demanding for the CPU. See the note for the border option (B<-b>) above. =item B<--book> Rearrange pages for printing books or booklets. If your "default" duplex mode (see B) is "NoTumble", you should either use lpr with the option "Tumble" manually, or consider the B option B<--screen> instead. =item B<-n, --nup=>I Put multiple logical pages onto each physical sheet of paper. The inner margin might be same as the outer one (depending on the booklet option B<-b>), see B<-m> for details. If I is less than 10, the option B<->I may be used as an alternative. =item B<--screen> By default, B ensures that your pdf will be printable using your "default" duplex mode (see B). B<--screen> tries to make the output PDF ready to read on your computer instead. It has no effect for portrait documents. =item B<--column> Change the order to `column-major', where successive pages are placed in columns down the paper. =item B<-q, --quiet> B normally prints the page numbers of the pages output; this option suppresses this. =item B<--help> Display a brief help. =item B<--version> Display the version number of the B program. =item B<--man> Display the manual page. =back =head1 EXAMPLES The following comand can be used to remotely crop a PDF file, convert it to A4 paper, and rearrange the pages to print a booklet: C<< ssh remote pdftool.pl -cpA4 --book -2 -b2cm -m-1cm < in.pdf | \ lpr -o Duplex=DumplexTumble >> =head1 REQUIREMENTS Requires PSUtils installed and available in the command line http://www.tardis.ed.ac.uk/~ajcd/psutils/. =head1 AUTHOR Copyright 2010-2011 Guilhem Moulin. See the source for copying conditions. =cut my $tmpdir = '/tmp'; # # Options & arguments # my $select; my ($outwidth,$outheight, $inwidth,$inheight); my ($margin, $border); my $crop; my $book; my $nup = 1; my $column; my $quiet; my $man; my $screen; GetOptions( "select|s=s" => \$select, "w|width=s" => \$outwidth, "h|height=s" => \$outheight, "p|paper=s" => sub { &papersize ($_[1],\$outwidth,\$outheight) }, "W|Width=s" => \$inwidth, "H|Height=s" => \$inheight, "P|Paper=s" => sub { &papersize ($_[1],\$inwidth,\$inheight) }, "margin|m=s" => \$margin, "border|b=s" => \$border, "crop|c" => \$crop, "book" => \$book, "nup|n=i" => \$nup, "screen" => \$screen, "1" => sub { $nup = 1 }, "2" => sub { $nup = 2 }, "3" => sub { $nup = 3 }, "4" => sub { $nup = 4 }, "5" => sub { $nup = 5 }, "6" => sub { $nup = 6 }, "7" => sub { $nup = 7 }, "8" => sub { $nup = 8 }, "9" => sub { $nup = 9 }, "column" => \$column, "q|quiet" => \$quiet, "man" => \$man ) or pod2usage(2); pod2usage(2) if ($#ARGV > 1); pod2usage(-exitstatus => 0, -verbose => 2) if defined $man; # Input and output files my ($infile, $outfile) = @ARGV; # # Default values # # Default margin & border $margin = 0 unless defined $margin; unless (defined $border) { if (defined $crop) { $border = '1cm'; } else { $border = 0; } } # Default output papersize &papersize ("a4", \$outwidth, \$outheight) unless (defined $outwidth and defined $outheight); # Default unit: PostScript point map {&topoints ($_)} ( \$outwidth, \$outheight, \$inwidth, \$inheight, \$margin, \$border ); die "Margins are too big" if $outwidth <= $margin*2 or $outheight <= $margin*2; # Open input and output files my $infile_display; if (defined $infile && $infile ne "-") { open FIN, '<', "$infile" or die "Can't read `$infile': $!"; $infile_display = $infile; } else { undef $infile; *FIN = *STDIN; $infile_display = "(stdin)"; } # After the pipe, it won't be detected as seekable my $inseek = (seek FIN, 0, 1) ? 1 : undef; if (defined $outfile && $outfile ne "-") { open FOUT, '>', "$outfile" or die "Can't create `$outfile': $!"; } else { *FOUT = *STDOUT; } *LOG = *STDERR; # # Detect filetype # # To avoid to seek into FIN, it gonna be copied from WRITE to READ in # background, once the filetype has been read # TODO: read specification to detect filetype properly my $filetype; pipe *READ, *WRITE or die "Can't pipe: $!"; while (not (defined $filetype) && defined (my $l = )) { print WRITE $l or die "Can't close: $!"; if (defined $l && $l =~ /^%!PS/) { $filetype = "PS"; } elsif (defined $l && $l =~ /^%PDF/) { $filetype = "PDF"; } } die "Cannot recognise FileType" unless defined $filetype; unless (my $pid = fork) { # Child: cat FIN > WRITE in background die "Can't fork: $!" unless defined $pid; close READ or die "Can't close: $!"; while () { print WRITE or die "Can't print: $!"; } exit; } # Parent close WRITE or die "Can't close: $!"; close FIN or die "Can't close: $!"; open *FIN, "<&READ" or die "Can't dup: $!"; # Auxiliary files, to remove my @auxfiles; # Pids, to waid for my @pids; # # Conversion from PDF to PS, if necessary # my @cmd; if ($filetype eq "PDF") { unless (defined $infile && $inseek) { # Need to copy the whole input to an auxiliary file, since # conversion from PDF to PS requires random access to the data $infile = "$tmpdir/pdftool-stdin-$$" . lc ".$filetype"; open FINAUX, '>', "$infile" or die "Can't write into `$infile': $!"; push @auxfiles, "$infile"; # cat > $infile while () { print FINAUX or die "Can't print: $!"; } close FINAUX; } my ($first, $last); if (defined $select) { # Convert to PS only the pages we are interested in ($first, $last) = (1<<16,-(1<<16)); for (split / *, */, $select) { my ($rmin, $rmax) = split (/ *- */, $_); undef $first if defined $rmin && not $rmin; undef $last if defined $rmax && not $rmax; if (defined $rmin && $rmin) { $first = $rmin if defined $first && $rmin < $first; $last = $rmin if defined $last && $rmin > $last; } if (defined $rmax && $rmax) { $first = $rmax if defined $first && $rmax < $first; $last = $rmax if defined $last && $rmax > $last; } } # Calculate the new page range my @newselect; for (split / *, */, $select) { my ($rmin, $rmax) = split / *- */, $_; $rmin -= $first-1 if defined $first && defined $rmin && $rmin; $rmax -= $first-1 if defined $first && defined $rmax && $rmax; my $r = ""; $r .= $rmin if defined $rmin; $r .= "-"; $r .= $rmax if defined $rmax; push @newselect, $r; } $select = join ',', @newselect; } # Convert to PS @cmd = ('pdftops', '-origpagesizes', "$infile", '-'); push @cmd, '-f', $first if defined $first; push @cmd, '-l', $last if defined $last; push @cmd, '-q' if defined $quiet; my $pid = open *PSIN, "-|", @cmd or die "Can't run `" . &printcmd (@cmd) . "'"; push @pids, [$pid, @cmd]; } else { open *PSIN, "<&FIN" or die "Can't dup: $!"; } open *IN, "<&PSIN" or die "Can't dup: $!"; # # Select, if necessary # if (defined $select) { @cmd = ('psselect', "-p$select"); push @cmd, '-q' if defined $quiet; my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; push @pids, [$pid, @cmd]; open *IN, "<&OUT" or die "Can't dup: $!"; } # # Bounding box # my @bbox; if (defined $crop) { # Calculate the maximal bounding box unless (seek IN, 0, 1) { # The input is not seekable: have to create a seekable auxiliary file my $auxfile = "$tmpdir/pdftool-stdin-$$.ps"; open AUXFD, '>', "$auxfile" or die "Can't write into `$auxfile': $!"; push @auxfiles, $auxfile; # cat > $auxfile while () { print AUXFD or die "Can't print: $!"; } close AUXFD or die "Can't close: $!"; close IN or die "Can't close: $!"; open IN, '<', "$auxfile" or die "Can't read `$auxfile': $!"; } # Need to duplicate IN, since it will be closed in the parent process open *GSIN, '<&IN'; @cmd = ('gs', '-sDEVICE=bbox', '-dBATCH', '-dNOPAUSE', '-'); my $pid = open3 "<&GSIN", ">&GSOUT", *GSOUT, @cmd; my ($p,$c) = (0,0); # Page & character counter my ($x0, $y0, $x1, $y1) = (1<<16, 1<<16, -(1<<16), -(1<<16)); while () { if ($_ =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/) { $x0 = $1 if $1 < $x0; $y0 = $2 if $2 < $y0; $x1 = $3 if $3 > $x1; $y1 = $4 if $4 > $y1; unless (defined $quiet) { my $s = "[" . ++$p . "] "; $c += length $s; if ($c >= 80) { print LOG "\n" or die "Can't close: $!"; $c = length $s; } print LOG $s or die "Can't close: $!"; } } } close GSOUT or die "Can't close: $!";; print LOG "\n" or die "Can't close: $!" unless defined $quiet; # No zombie processes waitpid $pid, 0; die "Can't run `" . &printcmd (@cmd) . "'" if $? >> 8; die "Error when calculating bounding box" if ($x0 >= $x1 || $y0 >= $y1); @bbox = ($x0, $y0, $x1, $y1); # Let's go back to the beginning of the input seek IN, 0, 0 or die "$!"; } elsif (defined $inwidth and defined $inheight) { @bbox = (0, 0, $inwidth, $inheight); } else { # Guess page size from the input file # To avoid to seek into IN, it gonna be copied from WRITE to READ # in background, once the Bounding Box has been read pipe *READ, *WRITE or die "Can't pipe: $!"; while (not (@bbox) && defined (my $l = )) { print WRITE $l or die "Can't close: $!"; @bbox = ($1, $2, $3, $4) if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/); } die "Cannot guess input page size!" unless @bbox; unless (my $pid = fork) { # Child: cat IN > WRITE in background die "Can't fork: $!" unless defined $pid; close READ or die "Can't close: $!";; while () { print WRITE or die "Can't close: $!"; } exit; } # Parent close WRITE or die "Can't close: $!"; close IN or die "Can't close: $!"; open *IN, "<&READ" or die "Can't dup: $!"; } # # PSBook # if (defined $book) { @cmd = ('psbook'); push @cmd, '-q' if defined $quiet; my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; } # # PSNup (inlined here, to keep track of the possible rotation) # my $landscape; if ((($bbox[2]-$bbox[0] > $bbox[3]-$bbox[1]) and not ($outwidth-2*$margin > $outheight-2*$margin)) or (defined $screen and not ($bbox[2]-$bbox[0] > $bbox[3]-$bbox[1]) and ($outwidth-2*$margin > $outheight-2*$margin))) { ($outheight, $outwidth) = ($outwidth, $outheight); $landscape = 1; } my ($horiz, $vert, $rotate, $scale, $hshift, $vshift) = &calc_layout ($nup, $border, \@bbox, $outwidth-2*$margin, $outheight-2*$margin); my ($ospecs,$especs) = &calc_specs ($horiz, $vert, $rotate, $scale, [$outwidth-2*$margin, $outheight-2*$margin, $hshift, $vshift]); my $land = ($outwidth > $outheight); $land = not $land if ($rotate%2); my $pagespecs; if (defined $screen || not $land) { $pagespecs = "$nup:" . join ('+', @$ospecs); } else { $pagespecs = 2*$nup . ':' . join ('+', @$ospecs) . ',' . join ('+', @$especs); } @cmd = ('pstops', '-w', $bbox[2], '-h', $bbox[3], $pagespecs); push @cmd, '-q' if defined $quiet; my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; # # Final file: Convert back to PDF # my $pagedevice; if (defined $screen || $land || $landscape) { $rotate = ($rotate+1)%4 if not (defined $screen) and $land; $pagedevice = "/Orientation $rotate /PageSize [$outwidth $outheight]"; } else { $pagedevice = "/PageSize [$outwidth $outheight]"; } @cmd = ('gs', "-sDEVICE=pdfwrite", "-sOutputFile=%stdout%", "-dBATCH", "-dNOPAUSE", "-dAutoRotatePages=/None", "-c", "<< $pagedevice >> setpagedevice", "-f", "-"); $pid = open3 "<&IN", ">&FOUT", ">&LOG", @cmd; push @pids, [$pid, @cmd]; # Avoid zombies map { my ($pid, @cmd) = @$_; # print STDERR "PID: ", $pid, " Cmd: `", &printcmd (@cmd), "'"; my ($r,$v) = (waitpid ($pid, 0), $?); warn "Can't run `" . &printcmd (@cmd) . "'" if ($r != -1 and $v >> 8); } @pids; map { close $_ or die "Can't close: $!" } ( *READ, *FIN, *FOUT ); # Delete auxiliary files unlink @auxfiles; # Useless, but Perl doesn't see that this filehandle is used more than # one time (and even automatically closed by `open3') exit; close GSIN; # ========================================================= # # Finding the best layout is an optimisation problem. We try all of the # combinations of width*height in both normal and rotated form, and # minimise the wasted space. # sub calc_layout { my ($nup, $border, $bbox, $outwidth, $outheight) = @_; my ($inwidth, $inheight) = ($bbox[2]-$bbox[0], $bbox[3]-$bbox[1]); my ($horiz, $vert, $rotate, $scale, $hshift, $vshift); my $tolerance = 100000; # layout tolerance my $best = $tolerance; for (my $hor = 1; $hor; $hor = &nextdiv($hor, $nup)) { my $ver = $nup / $hor; # try normal orientation first my $scl = &min ($outheight/($inheight*$ver), $outwidth/($inwidth*$hor)); my $optim = ($outwidth-$scl*$inwidth*$hor)*($outwidth-$scl*$inwidth*$hor) + ($outheight-$scl*$inheight*$ver)*($outheight-$scl*$inheight*$ver); if ($optim < $best) { $best = $optim; # recalculate scale to allow for internal borders $scale = &min (($outheight-2*$border*$ver)/($inheight*$ver), ($outwidth-2*$border*$hor)/($inwidth*$hor)); $hshift = ($outwidth/$hor - ($bbox[2]+$bbox[0])*$scale)/2; $vshift = ($outheight/$ver - ($bbox[3]+$bbox[1])*$scale)/2; ($horiz, $vert) = ($hor, $ver); $rotate = 0; } # try rotated orientation $scl = &min ($outheight/($inwidth*$hor), $outwidth/($inheight*$ver)); $optim = ($outheight-$scl*$inwidth*$hor)*($outheight-$scl*$inwidth*$hor) + ($outwidth-$scl*$inheight*$ver)*($outwidth-$scl*$inheight*$ver); if ($optim < $best) { $best = $optim; # recalculate scale to allow for internal borders $scale = &min (($outheight-2*$border*$hor)/($inwidth*$hor), ($outwidth-2*$border*$ver)/($inheight*$ver)); $hshift = ($outwidth/$ver - ($bbox[3]+$bbox[1])*$scale)/2; $vshift = ($outheight/$hor - ($bbox[2]+$bbox[0])*$scale)/2; ($horiz, $vert) = ($ver, $hor); $rotate = 3; } } # fail if nothing better than worst tolerance was found die "Can't find acceptable layout for $nup-up" if $best == $tolerance; return ($horiz, $vert, $rotate, $scale, $hshift, $vshift); } # # Construct pstops specification list # sub calc_specs { my ($horiz, $vert, $rotate, $scale, $bbox) = @_; my ($outwidth, $outheight, $hshift, $vshift) = @$bbox; my $n = $horiz * $vert; my (@ospecs, @especs); # specs for odd and even pages for (my $pageno = 0; $pageno < $n; $pageno++) { my ($up, $across); # pageno index my ($orot,$erot) = ('','U'); my ($xoff, $yoff); if ($rotate) { if (defined $column) { # column=0; leftright=1; topbottom=0; $across = $pageno % $horiz; $up = floor ($pageno / $horiz); } else { # column=1; leftright=1; topbottom=0; $across = floor($pageno / $vert); $up = $pageno % $vert; } ($orot,$erot) = ('L','R'); $xoff = ($across+1)*$outwidth/$horiz - $hshift; } else { if (defined $column) { # column=1; leftright=1; topbottom=1; $across = floor($pageno / $vert); $up = $vert-1 - floor($pageno % $vert); } else { # column=0; leftright=1; topbottom=1; $across = $pageno % $horiz; $up = $vert-1 - floor($pageno / $horiz); } $xoff = $across*$outwidth/$horiz + $hshift; } $yoff = $up*$outheight/$vert + $vshift; push @ospecs, sprintf ("%d%s@%.3f(%.3f,%.3f)", $pageno, $orot, $scale, $xoff+$margin, $yoff+$margin); push @especs, sprintf ("%d%s@%.3f(%.3f,%.3f)", $n + $pageno, $erot, $scale, $outwidth-$xoff+$margin, $outheight-$yoff+$margin); } return (\@ospecs,\@especs); } # # In-place convert the given length to PostScript points # sub topoints { my $l = $_[0]; return unless defined $$l; $$l =~ /^([+-]?\d*\.?\d+)(\w*)$/ or die "Unable to parse `$$l'"; my $r = $1; if ($2 eq "" or $2 eq "pt") { # nothing } elsif ($2 eq "in") { $r *= 72; } elsif ($2 eq "cm") { $r *= 72/2.54; } elsif ($2 eq "mm") { $r *= 72/25.4; } else { die "Unknown unit: `$2'"; } $$l = floor ($r + .5); } # # In-place set the given width and height to the predefined papersize # sub papersize { my ($p,$w,$h) = @_; $p = lc $p; if ($p eq "a0") { ($$w,$$h) = (2384, 3370); # 84.1cm * 118.9cm } elsif ($p eq "a1") { ($$w,$$h) = (1684, 2384); # 59.4cm * 84.1cm } elsif ($p eq "a2") { ($$w,$$h) = (1191, 1684); # 42cm * 59.4cm } elsif ($p eq "a3") { ($$w,$$h) = (842, 1191); # 29.7cm * 42cm } elsif ($p eq "a4") { ($$w,$$h) = (595, 842); # 21cm * 29.7cm } elsif ($p eq "a5") { ($$w,$$h) = (421, 595); # 14.85cm * 21cm } elsif ($p eq "b5") { ($$w,$$h) = (516, 729); # 18.2cm * 25.72cm } elsif ($p eq "letter") { ($$w,$$h) = (612, 792); # 8.5in * 11in } elsif ($p eq "legal") { ($$w,$$h) = (612, 1008); # 8.5in * 14in } elsif ($p eq "ledger") { ($$w,$$h) = (1224, 792); # 17in * 11in } elsif ($p eq "tabloid") { ($$w,$$h) = (792, 1224); # 11in * 17in } elsif ($p eq "statement") { ($$w,$$h) = (396, 612); # 5.5in * 8.5in } elsif ($p eq "executive") { ($$w,$$h) = (540, 720); # 7.6in * 10in } elsif ($p eq "folio") { ($$w,$$h) = (612, 936); # 8.5in * 13in } elsif ($p eq "quarto") { ($$w,$$h) = (610, 780); # 8.5in * 10.83in } elsif ($p eq "10x14") { ($$w,$$h) = ("10in", "14in"); } else { die "Unknown paper size: `$p'"; } } # # Print a command just like you'd do in a shell # sub printcmd { my @cmd; for (@_) { my $s = $_; $s =~ s/"/\\"/; $s = "\"$s\"" if $s =~ /[ ()';#{}*?~&|`]/; push @cmd, $s; } join ' ', @cmd; } sub nextdiv { my ($n, $m) = @_; while (++$n <= $m) { return $n if ($m % $n == 0) } return 0; } sub min { my ($n, $m) = @_; return $n if $n < $m; return $m; } sub round { return floor ($_[0] + .5); }