#! /usr/bin/perl -w use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help); use Pod::Usage; use IPC::Open2; use IPC::Open3; use POSIX qw(floor); use strict; =head1 NAME pdftool.pl - a PDF swiss army knife =head1 SYNOPSIS B [-s I] [-w I ] [-h I] [-p I] [-W I] [-H I] [-P I] [-p I] [-m I] [-c] [-b] [-n I] [-q] [I [I]] =head1 DESCRIPTION I combines the tools in the PSUtils bundle in a nice way. The input should be either a Portable Document Format (PDF) file, or a PostScript file. The output format is the same as the input one. If no input file is given, or if a single hyphen-minus (B<->) is given as file name, I will read the PDF or PostScript data from the standard input. In that case, and if the input data is in PDF format, an auxiliary file will be created (since the conversion from PDF to PS requires random access to the data), and removed afterwards. An other auxiliary file may be created by I, see its man page for details. If no output file is given, or if a single hyphen-minus (B<->) is given as file name, I will send the data (of the same type as the input) to the standard output. The document will be treated as follows: =over 8 =item Conversion from PDF to PostScript (if necessary), =item Selection of the page range, =item Resizing to the given papersize, adding a margin, and croping, =item Rearranging pages for printing books or booklets, =item Putting several pages per sheets, =item Conversion from PS to PDF (if necessary). =back =head1 OPTIONS =over 8 =item B<-s, --select> Specifies the pages which are to be selected. I is a comma separated list of page ranges, each of which may be a page number, or a page range of the form I-I. If I is omitted, the first page is assumed, and if I is omitted, the last page is assumed. The prefix character `_' indicates that the page number is relative to the end of the document, counting backwards. If just this character with no page number is used, a blank page will be inserted. =item B<-w, --width> Specify the width of the output file. If the height is not specified as well, it will be ignored. The known units are B, B, B and B. The default unit is B. =item B<-h, --height> Specify the height of the output file. If the width is not specified as well, it will be ignored. The known units are B, B, B and B. The default unit is B. =item B<-p, --paper> Specify the paper size of the output file, as an alternative to B<-w> and B<-h>. Can be set to B, B, B, B, B, B, B, B, B, B, B, B, B, B, or B<10x14>. The default output paper size is B. =item B<-W, --Width> Same as the option B<-w>, but for the input file. =item B<-H, --Height> Same as the option B<-h>, but for the input file. =item B<-P, --Paper> Same as the option B<-p>, but for the input file. By default, I will try to guess this value from the header of the file, and fail if the information is missing. This option is useless if the crop option (B<-c>) is set. =item B<-m, --margin> Add a margin to the output file. Possible units are B, B, B and B. The default unit is B. The default margin is B<1cm> if the crop option (B<-c>) is set, and B<0> otherwise. If the nup option (B<-n>I) is set to more than one page per sheet, and if the booklet option (B<-b>) is not set, the margin will be the same beetween two logical pages, and between a logical page and the sheet border. Otherwise, the margin beetween two logical pages will be twice bigger the one between a logical page and the sheet border. =item B<-c, --crop> If this option is set, I will interpret the PostScript code to calculate the maximal effective bounding box. This operation may be quite demanding for the CPU. =item B<-b, --book> Rearrange pages for printing books or booklets. =item B<-n, --nup> Puts multiple logical pages (has to be a power of two) onto each physical sheet of paper. The inner margin might be same as the outer one (depending on the booklet option B<-b>), see B<-m> for details. If I is less than 10, the option B<->I may be used as an alternative. =item B<-q, --quiet> I normally prints the page numbers of the pages output; this option suppresses this. =item B<--help> Display a brief help. =item B<--version> Display the version number of the I program. =item B<--man> Display the manual page. =back =head1 EXAMPLES The following comand can be used to remotely crop a PDF file, convert it to A4 paper, and rearrange the pages for printing a booklet: ssh remote pdftool.pl -cpA4 -b2 < in.pdf > out.pdf =head1 REQUIRE Requires PSUtils installed and available in the command line http://www.tardis.ed.ac.uk/~ajcd/psutils/. PSResize2 is also required, mostly for the crop option B<-c>. =head1 AUTHOR Public domain, (c) Guilhem Moulin. =head1 VERSION Version: 0.2, 12 December 2010 =cut # TODO: inline it in the header $main::VERSION = "0.2, 12 December 2010"; my $tmpdir = '/tmp'; # # Options & arguments # my $select; my ($outwidth,$outheight, $inwidth,$inheight); my $margin; my $crop; my $book; my $nup = 1; my $rotdir = 'L'; my $quiet; my $man; # TODO: choose the output type GetOptions( "select|s=s" => \$select, "w|width=s" => \$outwidth, "h|height=s" => \$outheight, "p|paper=s" => sub { &papersize ($_[1],\$outwidth,\$outheight) }, "W|Width=s" => \$inwidth, "H|Height=s" => \$inheight, "P|Paper=s" => sub { &papersize ($_[1],\$inwidth,\$inheight) }, "margin|m=s" => \$margin, "crop|c" => \$crop, "book|b" => \$book, "nup|n=i" => \$nup, "1" => sub { $nup = 1 }, "2" => sub { $nup = 2 }, "3" => sub { $nup = 3 }, "4" => sub { $nup = 4 }, "5" => sub { $nup = 5 }, "6" => sub { $nup = 6 }, "7" => sub { $nup = 7 }, "8" => sub { $nup = 8 }, "9" => sub { $nup = 9 }, "q|quiet" => \$quiet, "man" => \$man ) or pod2usage(2); pod2usage(2) if ($#ARGV > 1); pod2usage(-exitstatus => 0, -verbose => 2) if defined $man; # Input and output files my ($infile, $outfile) = @ARGV; # # Default values # # Default margin unless (defined $margin) { $margin = 0; $margin = "1cm" if defined $crop; } # Default output papersize &papersize ("a4", \$outwidth, \$outheight) unless (defined $outwidth and defined $outheight); # Default unit: PostScript point map {&topoints ($_)} ( \$outwidth, \$outheight, \$inwidth, \$inheight, \$margin ); # Inner and outer margins my ($mresize, $mnup) = (0,0); if ($nup > 1 && not defined $book) { $mresize = $margin/2; $mnup = $mresize; } else { $mresize = $margin; } # TODO: would be nice to generalize $nup to any integer that psnup would # accept. my $i = -1; while (1<<++$i < $nup) {}; die "nup has to be a power of two" if 1<<$i > $nup; $nup = $i; # Open input and output files my $infile_display; if (defined $infile && $infile ne "-") { open FIN, '<', "$infile" or die "Can't read `$infile': $!"; $infile_display = $infile; } else { undef $infile; *FIN = *STDIN; $infile_display = "(stdin)"; } # After the pipe, it won't be detected as seekable my $inseek = (seek FIN, 0, 1) ? 1 : undef; if (defined $outfile && $outfile ne "-") { open FOUT, '>', "$outfile" or die "Can't create `$outfile': $!"; } else { *FOUT = *STDOUT; } *LOG = *STDERR; # # Detect filetype # # To avoid to seek into FIN, it gonna be copied from WRITE to READ in # background, once the filetype has been read # TODO: read specification to detect filetype properly my $filetype; pipe *READ, *WRITE or die "Can't pipe: $!"; while (not (defined $filetype) && defined (my $l = )) { print WRITE $l or die "Can't close: $!"; if (defined $l && $l =~ /^%!PS/) { $filetype = "PS"; } elsif (defined $l && $l =~ /^%PDF/) { $filetype = "PDF"; } } die "Cannot recognise FileType" unless defined $filetype; unless (my $pid = fork) { # Child: cat FIN > WRITE in background die "Can't fork: $!" unless defined $pid; close READ or die "Can't close: $!"; while () { print WRITE or die "Can't print: $!"; } exit; } # Parent close WRITE or die "Can't close: $!"; close FIN or die "Can't close: $!"; open *FIN, "<&READ" or die "Can't dup: $!"; # Auxiliary files, to remove my @auxfiles; # Pids, to waid for my @pids; # # Conversion from PDF to PS, if necessary # my @cmd; if ($filetype eq "PDF") { unless (defined $infile && $inseek) { # Need to copy the whole input to an auxiliary file, since # conversion from PDF to PS requires random access to the data $infile = "$tmpdir/pdftool-stdin-$$" . lc ".$filetype"; open FINAUX, '>', "$infile" or die "Can't write into `$infile': $!"; push @auxfiles, "$infile"; # cat > $infile while () { print FINAUX or die "Can't print: $!"; } close FINAUX; } my ($first, $last); if (defined $select) { # Convert to PS only the pages we are interested in ($first, $last) = (1<<16,-(1<<16)); for (split / *, */, $select) { my ($rmin, $rmax) = split (/ *- */, $_); undef $first if defined $rmin && not $rmin; undef $last if defined $rmax && not $rmax; if (defined $rmin && $rmin) { $first = $rmin if defined $first && $rmin < $first; $last = $rmin if defined $last && $rmin > $last; } if (defined $rmax && $rmax) { $first = $rmax if defined $first && $rmax < $first; $last = $rmax if defined $last && $rmax > $last; } } # Calculate the new page range my @newselect; for (split / *, */, $select) { my ($rmin, $rmax) = split / *- */, $_; $rmin -= $first-1 if defined $first && defined $rmin && $rmin; $rmax -= $first-1 if defined $first && defined $rmax && $rmax; my $r = ""; $r .= $rmin if defined $rmin; $r .= "-"; $r .= $rmax if defined $rmax; print "$r\n"; push @newselect, $r; } $select = join ',', @newselect; } # Convert to PS @cmd = ('pdftops', '-origpagesizes', "$infile", '-'); push @cmd, '-f', $first if defined $first; push @cmd, '-l', $last if defined $last; push @cmd, '-q' if defined $quiet; my $pid = open *PSIN, "-|", @cmd or die "Can't run `" . &printcmd (@cmd) . "'"; push @pids, [$pid, @cmd]; } else { open *PSIN, "<&FIN" or die "Can't dup: $!"; } open *IN, "<&PSIN" or die "Can't dup: $!"; # # Select, if necessary # if (defined $select) { @cmd = ('psselect', "-p$select"); push @cmd, '-q' if defined $quiet; my $pid = open2 *OUT, "<&IN", @cmd; push @pids, [$pid, @cmd]; open *IN, "<&OUT" or die "Can't dup: $!"; } # # Bounding box # my @bbox; if (defined $crop) { # Calculate the maximal bounding box unless (seek IN, 0, 1) { # The input is not seekable: have to create a seekable auxiliary file my $auxfile = "$tmpdir/psresize-stdin-$$.ps"; open AUXFD, '>', "$auxfile" or die "Can't write into `$auxfile': $!"; push @auxfiles, $auxfile; # cat > $auxfile while () { print AUXFD or die "Can't print: $!"; } close AUXFD or die "Can't close: $!"; close IN or die "Can't close: $!"; open IN, '<', "$auxfile" or die "Can't read `$auxfile': $!"; } # Need to duplicate IN, since it will be closed in the parent process open *GSIN, '<&IN'; @cmd = ('gs', '-sDEVICE=bbox', '-dBATCH', '-dNOPAUSE', '-'); my $pid = open3 "<&GSIN", ">&GSOUT", *GSOUT, @cmd; my ($p,$c) = (0,0); # Page & character counter my ($x0, $y0, $x1, $y1) = (1<<16, 1<<16, -(1<<16), -(1<<16)); while () { if ($_ =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/) { $x0 = $1 if $1 < $x0; $y0 = $2 if $2 < $y0; $x1 = $3 if $3 > $x1; $y1 = $4 if $4 > $y1; unless (defined $quiet) { my $s = "[" . ++$p . "] "; $c += length $s; if ($c >= 80) { print LOG "\n" or die "Can't close: $!"; $c = length $s; } print LOG $s or die "Can't close: $!"; } } } close GSOUT or die "Can't close: $!";; print LOG "\n" or die "Can't close: $!" unless defined $quiet; # No zombie processes waitpid $pid, 0; die "Can't run `" . &printcmd (@cmd) . "'" if $? >> 8; die "Error when calculating bounding box" if ($x0 >= $x1 || $y0 >= $y1); @bbox = ($x0, $y0, $x1, $y1); # Let's go back to the beginning of the input seek IN, 0, 0 or die "$!"; } elsif (defined $inwidth and defined $inheight) { @bbox = (0, 0, $inwidth, $inheight); } else { # Guess page size from the input file # To avoid to seek into IN, it gonna be copied from WRITE to READ # in background, once the Bounding Box has been read pipe *READ, *WRITE or die "Can't pipe: $!"; while (not (@bbox) && defined (my $l = )) { print WRITE $l or die "Can't close: $!"; @bbox = ($1, $2, $3, $4) if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/); } die "Cannot guess input page size!" unless @bbox; unless (my $pid = fork) { # Child: cat IN > WRITE in background die "Can't fork: $!" unless defined $pid; close READ or die "Can't close: $!";; while () { print WRITE or die "Can't close: $!"; } exit; } # Parent close WRITE or die "Can't close: $!"; close IN or die "Can't close: $!"; open *IN, "<&READ" or die "Can't dup: $!"; } # # Calculate PStoPS specification # if (($outwidth > $bbox[3] - $bbox[1]) xor ($bbox[2] - $bbox[0] > $outheight)) { ($outwidth, $outheight) = ($outheight, $outwidth); } my ($x0,$x1) = &calculate_coordinates($outwidth , $margin); my ($y0,$y1) = &calculate_coordinates($outheight, $margin); my $rotation; my $spec = 0 . &calc_pstops_page(@bbox, $x0, $y0, $x1, $y1); # # Run the program and filter the output # @cmd = ('pstops', "-w$outwidth", "-h$outheight", "$spec"); push @cmd, '-q' if defined $quiet; my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; open *IN, "<&OUT" or die "Can't dup: $!"; # # PSBook # if (defined $book) { @cmd = ('psbook'); push @cmd, '-q' if defined $quiet; my $pid = open2 *OUT, "<&IN", @cmd; open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; } # # PSNup # if (1<<$nup > 1) { my ($inwidth, $inheight) = ($outwidth, $outheight); if ($nup % 2) { ($outwidth, $outheight) = ($outheight, $outwidth); } $nup = 1<<$nup; @cmd = ('psnup', "-W$inwidth", "-H$inheight", "-w$outwidth", "-h$outheight", "-m$mnup", "-$nup"); push @cmd, '-q' if defined $quiet; my $pid = open2 *OUT, "<&IN", @cmd; open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; } # # Final file: Convert back to PDF # @cmd = ('gs', "-sDEVICE=pdfwrite", "-sOutputFile=%stdout%", "-dBATCH", "-dNOPAUSE", "-dAutoRotatePages=/None", "-c", "<< /Orientation 0 /PageSize [$outwidth $outheight] >> setpagedevice", "-f", "-"); $pid = open2 ">&FOUT", "<&IN", @cmd; push @pids, [$pid, @cmd]; # Avoid zombies map { my ($pid, @cmd) = @$_; # print STDERR "PID: ", $pid, " Cmd: `", &printcmd (@cmd), "'"; my ($r,$v) = (waitpid ($pid, 0), $?); warn "Can't run `" . &printcmd (@cmd) . "'" if ($r != -1 and $v >> 8); } @pids; map { close $_ or die "Can't close: $!" } ( *READ, *FIN, *FOUT ); # Delete auxiliary files unlink @auxfiles; # Useless, but Perl doesn't see that this filehandle is used more than # one time (and even automatically closed by `open3') exit; close GSIN; # ========================================================= # # Calculate an item of the pstops specification # sub calc_pstops_page { my ($fx0, $fy0, $fx1, $fy1, $tx0, $ty0, $tx1, $ty1) = @_; # From and to width / height my ($wf, $hf) = ($fx1 - $fx0, $fy1 - $fy0); my ($wt, $ht) = ($tx1 - $tx0, $ty1 - $ty0); # Check if rotation required (in our case, should always be 0) my $rotation = (($wf > $hf) xor ($wt > $ht)); # Scale factor width / height my ($sw, $sh); if ($rotation) { ($sw, $sh) = ($ht / $wf, $wt / $hf); } else { ($sw, $sh) = ($wt / $wf, $ht / $hf); } # We take the smallest scale my $scale = ($sw > $sh) ? $sh : $sw; # Calculate the centers of the boxes my ($cxf, $cyf) = ( .5 * ($fx0 + $fx1), .5 * ($fy0 + $fy1) ); my ($cxt, $cyt) = ( .5 * ($tx0 + $tx1), .5 * ($ty0 + $ty1) ); # First, PStoPs scales, then rotates, then moves ($cxf, $cyf) = ($cxf * $scale, $cyf * $scale); if ($rotation) { if ($rotdir eq 'L') { ($cxf, $cyf) = (-$cyf, $cxf); } else { ($cxf, $cyf) = ($cyf, -$cxf); } } else { $rotdir = ''; } my ($movex, $movey) = ($cxt - $cxf, $cyt - $cyf); # Generate the summary return sprintf( '%s@%.3f(%.3f,%.3f)', $rotdir, $scale, $movex, $movey); } # # Calculate the begining and ending coordinates, after shaving 2 times # the margin # sub calculate_coordinates { my ($length, $margin) = @_; my $skip = $length - $margin; my $outwidth = $skip - $margin; return ( &round( &round($skip) - $outwidth ), &round($skip) ); } # # Round a float number # sub round { return floor ($_[0] + .5); } # # In-place convert the given length to PostScript points # sub topoints { my $l = $_[0]; return unless defined $$l; $$l =~ /^([+-]?\d*\.?\d+)(\w*)$/ or die "Unable to parse `$$l'"; my $r = $1; if ($2 eq "" or $2 eq "pt") { # nothing } elsif ($2 eq "in") { $r *= 72; } elsif ($2 eq "cm") { $r *= 72/2.54; } elsif ($2 eq "mm") { $r *= 72/25.4; } else { die "Unknown unit: `$2'"; } $$l = floor ($r + .5); } # # In-place set the given width and height to the predefined papersize # sub papersize { my ($p,$w,$h) = @_; $p = lc $p; if ($p eq "a0") { ($$w,$$h) = ("841mm", "1189mm"); } elsif ($p eq "a1") { ($$w,$$h) = ("594mm", "841mm"); } elsif ($p eq "a2") { ($$w,$$h) = ("420mm", "594mm"); } elsif ($p eq "a3") { ($$w,$$h) = ("297mm", "420mm"); } elsif ($p eq "a4") { ($$w,$$h) = ("210mm", "297mm"); } elsif ($p eq "a5") { ($$w,$$h) = ("148mm", "210mm"); } elsif ($p eq "letter") { ($$w,$$h) = ("8.5in", "11in"); } elsif ($p eq "legal") { ($$w,$$h) = ("8.5in", "14in"); } elsif ($p eq "tabloid") { ($$w,$$h) = ("11in", "17in"); } elsif ($p eq "statement") { ($$w,$$h) = ("5.5in", "8.5in"); } elsif ($p eq "executive") { ($$w,$$h) = ("7.25in", "10.5in"); } elsif ($p eq "folio") { ($$w,$$h) = ("8.27in", "13in"); } elsif ($p eq "quarto") { ($$w,$$h) = ("9in", "11in"); } elsif ($p eq "10x14") { ($$w,$$h) = ("10in", "14in"); } else { die "Unknown paper size: `$p'"; } } # # Print a command just like you'd do in a shell # sub printcmd { my @cmd; for (@_) { my $s = $_; $s =~ s/"/\\"/; $s = "\"$s\"" if $s =~ /[ ()';#{}*?~&|`]/; push @cmd, $s; } join ' ', @cmd; }