#! /usr/bin/perl -w use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help); use Pod::Usage; use IPC::Open2; use POSIX qw(floor); use strict; =head1 NAME pdftool.pl - a PDF swiss army knife =head1 SYNOPSIS B [-s I] [-p I] [-m I] [-c] [-b] [-n I] [-q] [I [I]] =head1 DESCRIPTION I combines the tools in the PSUtils bundle in a nice way. The input should be either a Portable Document Format (PDF) file, or a PostScript file. The output format is the same as the input one. If no input file is given, or if a single hyphen-minus (B<->) is given as file name, I will read the PDF or PostScript data from the standard input. In that case, and if the input data is in PDF format, an auxiliary file will be created (since the conversion from PDF to PS requires random access to the data), and removed afterwards. An other auxiliary file may be created by I, see its man page for details. If no output file is given, or if a single hyphen-minus (B<->) is given as file name, I will send the data (of the same type as the input) to the standard output. The document will be treated as follows: =over 8 =item Conversion from PDF to PostScript (if necessary), =item Selection of the page range, =item Resizing to the given papersize, adding a margin, and croping, =item Rearranging pages for printing books or booklets, =item Putting several pages per sheets, =item Conversion from PS to PDF (if necessary). =back =head1 OPTIONS =over 8 =item B<-s, --select> Specifies the pages which are to be selected. I is a comma separated list of page ranges, each of which may be a page number, or a page range of the form I-I. If I is omitted, the first page is assumed, and if I is omitted, the last page is assumed. The prefix character `_' indicates that the page number is relative to the end of the document, counting backwards. If just this character with no page number is used, a blank page will be inserted. =item B<-p, --paper> Specify the paper size of the output file. Can be set to B, B, B, B, B, B, B, B, B, B, B, B, B, B, or B<10x14>. The default output paper size is B. =item B<-m, --margin> Add a margin to the output file. Possible units are B, B, B and B. The default unit is B. The default margin is B<1cm> if the crop option (B<-c>) is set, and B<0> otherwise. If the nup option (B<-n>I) is set to more than one page per sheet, and if the booklet option (B<-b>) is not set, the margin will be the same beetween two logical pages, and between a logical page and the sheet border. Otherwise, the margin beetween two logical pages will be twice bigger the one between a logical page and the sheet border. =item B<-c, --crop> If this option is set, I will interpret the PostScript code to calculate the maximal effective bounding box. This operation may be quite demanding for the CPU. =item B<-b, --book> Rearrange pages for printing books or booklets. =item B<-n, --nup> Puts multiple logical pages onto each physical sheet of paper. The inner margin might be same as the outer one (depending on the booklet option B<-b>), see B<-m> for details. If I is less than 10, the option B<->I may be used as an alternative. =item B<-q, --quiet> I normally prints the page numbers of the pages output; this option suppresses this. =item B<--help> Display a brief help. =item B<--version> Display the version number of the I program. =item B<--man> Display the manual page. =back =head1 EXAMPLES The following comand can be used to remotely crop a PDF file, convert it to A4 paper, and rearrange the pages for printing a booklet: ssh remote pdftool.pl -cpA4 -b2 < in.pdf > out.pdf =head1 REQUIRE Requires PSUtils installed and available in the command line http://www.tardis.ed.ac.uk/~ajcd/psutils/. PSResize2 is also required, mostly for the crop option B<-c>. =head1 AUTHOR Public domain, (c) Guilhem Moulin. =head1 VERSION Version: 0.1, 25 September 2010 =cut # TODO: inline it in the header $main::VERSION = "0.1, 25 September 2010"; my $tmpdir = '/tmp'; # # Options & arguments # my $select; my $paper; my $margin; my $crop; my $book; my $nup = 1; my $quiet; my $man; # TODO: choose the output type GetOptions( "select|s=s" => \$select, "paper|p=s" => \$paper, "margin|m=s" => \$margin, "crop|c" => \$crop, "book|b" => \$book, "nup|n=i" => \$nup, "1" => sub { $nup = 1 }, "2" => sub { $nup = 2 }, "3" => sub { $nup = 3 }, "4" => sub { $nup = 4 }, "5" => sub { $nup = 5 }, "6" => sub { $nup = 6 }, "7" => sub { $nup = 7 }, "8" => sub { $nup = 8 }, "9" => sub { $nup = 9 }, "q|quiet" => \$quiet, "man" => \$man ) or pod2usage(2); pod2usage(2) if ($#ARGV > 1); pod2usage(-exitstatus => 0, -verbose => 2) if defined $man; # Input and output files my ($infile, $outfile) = @ARGV; # # Default values # # Default output papersize $paper = "a4" unless defined $paper; # Default margin unless (defined $margin) { $margin = 0; $margin = "1cm" if defined $crop; } # Default unit: PostScript point &topoints( \$margin ); # Inner and outer margins my ($mresize, $mnup) = (0,0); if ($nup > 1 && not defined $book) { $mresize = $margin/2; $mnup = $mresize; } else { $mresize = $margin; } # Open input and output files my $infile_display; if (defined $infile && $infile ne "-") { open FIN, '<', "$infile" or die "Can't read `$infile': $!"; $infile_display = $infile; } else { undef $infile; *FIN = *STDIN; $infile_display = "(stdin)"; } # After the pipe, it won't be detected as seekable my $inseek = (seek FIN, 0, 1) ? 1 : undef; if (defined $outfile && $outfile ne "-") { open FOUT, '>', "$outfile" or die "Can't create `$outfile': $!"; } else { *FOUT = *STDOUT; } # # Detect filetype # # To avoid to seek into FIN, it gonna be copied from WRITE to READ in # background, once the filetype has been read # TODO: read specification to detect filetype properly my $filetype; pipe *READ, *WRITE or die "Can't pipe: $!"; while (not (defined $filetype) && defined (my $l = )) { print WRITE $l or die "Can't close: $!"; if (defined $l && $l =~ /^%!PS/) { $filetype = "PS"; } elsif (defined $l && $l =~ /^%PDF/) { $filetype = "PDF"; } } die "Cannot recognise FileType" unless defined $filetype; unless (my $pid = fork) { # Child: cat FIN > WRITE in background die "Can't fork: $!" unless defined $pid; close READ or die "Can't close: $!"; while () { print WRITE or die "Can't print: $!"; } exit; } # Parent close WRITE or die "Can't close: $!"; close FIN or die "Can't close: $!"; open *FIN, "<&READ" or die "Can't dup: $!"; # Auxiliary files, to remove my @auxfiles; # Pids, to waid for my @pids; # # Conversion from PDF to PS, if necessary # my @cmd; if ($filetype eq "PDF") { unless (defined $infile && $inseek) { # Need to copy the whole input to an auxiliary file, since # conversion from PDF to PS requires random access to the data $infile = "$tmpdir/pdftool-stdin-$$" . lc ".$filetype"; open FINAUX, '>', "$infile" or die "Can't write into `$infile': $!"; push @auxfiles, "$infile"; # cat > $infile while () { print FINAUX or die "Can't print: $!"; } close FINAUX; } # Convert to PS @cmd = ('pdftops', "$infile", '-'); push @cmd, '-q' if defined $quiet; my $pid = open PSIN, "-|", @cmd or die "Can't run `" . &printcmd (@cmd) . "'"; push @pids, [$pid, @cmd]; } else { open *PSIN, "<&FIN" or die "Can't dup: $!"; } # # Select, if necessary # # TODO: preselection, during the conversion from pdf? if (defined $select) { @cmd = ('psselect', "-p$select"); push @cmd, '-q' if defined $quiet; my $pid = open2 *PSSELECT, "<&PSIN", @cmd; push @pids, [$pid, @cmd]; } else { *PSSELECT = *PSIN; } # # Resize file to our paper # @cmd = ('psresize2.pl', "-p$paper", "-m$mresize"); push @cmd, "-c" if defined $crop; push @cmd, '-q' if defined $quiet; my $pid = open2 *PSRESIZE, "<&PSSELECT", @cmd; push @pids, [$pid, @cmd]; # Note: open2 closes the filehandles for us :) # # PSBook # if (defined $book) { @cmd = ('psbook'); push @cmd, '-q' if defined $quiet; my $pid = open2 *PSBOOK, "<&PSRESIZE", @cmd; push @pids, [$pid, @cmd]; } else { *PSBOOK = *PSRESIZE; } # # PSNup # if ($nup > 1) { @cmd = ('psnup', "-p$paper", "-m$mnup", "-$nup"); push @cmd, '-q' if defined $quiet; my $pid = open2 *PSOUT, "<&PSBOOK", @cmd; push @pids, [$pid, @cmd]; } else { *PSOUT = *PSBOOK; } # # Final file # if ($filetype eq "PDF") { # Convert back to PDF @cmd = ('ps2pdf', "-dEmbedAllFonts=true", "-sPAPERSIZE=$paper", '-', '-'); $pid = open2 ">&FOUT", "<&PSOUT", @cmd; push @pids, [$pid, @cmd]; } else { # cat > FOUT while () { print FOUT or die "Can't print: $!"; } } # Avoid zombies map { my ($pid, @cmd) = @$_; # print STDERR "PID: ", $pid, " Cmd: `", &printcmd (@cmd), "'"; my ($r,$v) = (waitpid ($pid, 0), $?); warn "Can't run `" . &printcmd (@cmd) . "'" if ($r != -1 and $v >> 8); } @pids; map { close $_ or die "Can't close: $!" } ( *READ, *FIN, *FOUT ); # Delete auxiliary files unlink @auxfiles; # ========================================================= # # In-place convert the given length to PostScript points # sub topoints { my $l = $_[0]; return unless defined $$l; $$l =~ /^([+-]?\d*\.?\d+)(\w*)$/ or die "Unable to parse `$$l'"; my $r = $1; if ($2 eq "" or $2 eq "pt") { # nothing } elsif ($2 eq "in") { $r *= 72; } elsif ($2 eq "cm") { $r *= 72/2.54; } elsif ($2 eq "mm") { $r *= 72/25.4; } else { die "Unknown unit: `$2'"; } $$l = floor ($r + .5); } # # Print a command just like you'd do in a shell # sub printcmd { my @cmd; for (@_) { my $s = $_; $s =~ s/"/\\"/; $s = "\"$s\"" if $s =~ /[ ()';#{}*?~&|`]/; push @cmd, $s; } join ' ', @cmd; }