summaryrefslogtreecommitdiffstats
path: root/pdftool.pl
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem.moulin@ens-lyon.org>2012-01-06 16:52:59 +0100
committerGuilhem Moulin <guilhem.moulin@ens-lyon.org>2012-01-06 16:52:59 +0100
commit8d5d61fc79ac8403272222f8921b33f95480b831 (patch)
tree6134b77966b05ef4f4d9b6aa8e7490692b89cd10 /pdftool.pl
parentf732cca08a17007c923ccaae3fbcb47175050bf9 (diff)
wibble; File::Spec; man
Diffstat (limited to 'pdftool.pl')
-rwxr-xr-xpdftool.pl185
1 files changed, 99 insertions, 86 deletions
diff --git a/pdftool.pl b/pdftool.pl
index ab52f2c..f2befe5 100755
--- a/pdftool.pl
+++ b/pdftool.pl
@@ -7,14 +7,15 @@
# See http://sam.zoy.org/wtfpl/COPYING for more details.
-$VERSION = "0.4, 12 May 2011";
+$VERSION = "0.5, 06 January 2012";
-use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat
- bundling auto_version auto_help);
+use Getopt::Long qw /:config posix_default no_ignore_case gnu_compat
+ bundling auto_version auto_help/;
use Pod::Usage;
use IPC::Open3;
-use POSIX qw(floor);
-use Error qw(:try);
+use File::Spec::Functions qw /tmpdir catfile/;
+use POSIX qw /floor/;
+use Error qw /:try/;
use strict;
@@ -27,7 +28,7 @@ pdftool.pl - a PDF swiss army knife
B<pdftool.pl> [B<-w> I<width> ] [B<-h> I<heigth>] [B<-p> I<paper>]
[B<-W> I<width>] [B<-H> I<heigth>] [B<-P> I<paper>] [B<-s> I<pages>]
[B<-m> I<margin>] [B<-b> I<border>] [B<-c>] [B<--book>] [B<--column>]
-[B<-n> I<num>] [B<--screen>] [B<--pdf>] [B<-q>] [I<infile> [I<outfile>]]
+[B<-n> I<num>] [B<--no-rotate>] [B<--pdf>] [B<-q>] [I<infile> [I<outfile>]]
=head1 DESCRIPTION
@@ -36,22 +37,32 @@ input should be either a Portable Document Format (PDF) file, or a
PostScript file.
If no input file is given, or if a single hyphen-minus (I<->) is given as
-file name, B<PDFTool> will read the PDF or PostScript data from the standard
-input. In that case, and if the input data is in PDF format, an
-auxiliary file will be created (since the conversion from PDF to PS
-requires random access to the data), and removed afterwards. Also, if the crop
-option (B<-c>) is set, an auxiliary file will be created, and removed
+file name, B<PDFTool> reads the PDF or PostScript data from the standard
+input. In a PDF is sent to the standard input, an auxiliary file is
+created (because reading a PDF requires random access to the data), and removed
+afterwards. Also, if the option B<-c> (cropping) is set while the input is
+not a regular seekable file, an auxiliary file is created, and removed
afterwards.
+The input page size is by default guessed from the input document. However,
+the options B<-P>, B<-W> and B<-H> let you choose a specific input page
+size, while B<-c> makes B<PDFTool> ignore the input page size and
+calculate the minimal bouding box instead. The default output page size
+is I<a4>.
+
If no output file is given, or if a single hyphen-minus (I<->) is given as
-file name, B<PDFTool> will send the data to the standard output.
+file name, B<PDFTool> sends the data to the standard output. By
+defaults, B<PDFTool> outputs a PostScript document; see the B<--pdf> to
+get a PDF instead.
-By default, B<PDFTool> rotates the pages in order to ensure that your pdf will
-be printable using your favorite duplex mode for portrait documents (Tumble if
-you prefer to turn the pages like those of a book). See the B<--screen>
-option to bypass this behavior.
+By default, B<PDFTool> rotates the pages in order to ensure that your
+document will always be printable using your favorite duplex mode for portrait
+documents (e.g., Tumble if you prefer to turn the pages like those of a
+book - "vertical folding"; regardless of the orientation of the document).
+See the B<--no-rotate> option to bypass this behavior e.g., to read the
+output document on your screen.
-The document will be treated as follows:
+B<PDFTool> does the following passes on the input document:
=over 4
@@ -60,15 +71,17 @@ that is if all page numbers are relative to the begining of the
document, convert only the smallest interval that contains all the selected
pages),
-=item * Select the page range if necessary,
+=item * Select the page range if necessary (option B<-s>),
-=item * Calculate the minimal bounding box,
+=item * Calculate the minimal bounding box (option B<-c>),
-=item * Rearrange pages for printing books or booklets if necessary,
+=item * Rearrange pages for printing books or booklets if necessary
+(option B<--book>),
-=item * Put multiple pages per sheets,
+=item * Put multiple pages per sheets (option B<-n>), and
-=item * "Flatten" the output, and convert back to PDF if necessary.
+=item * "Flatten" the output, and convert back to PDF if necessary
+(option B<--pdf>).
=back
@@ -82,7 +95,7 @@ Specify the pages which are to be selected.
I<Pages> is a comma separated list of page ranges, each of which
may be a page number, or a page range of the form
I<first>-I<last>. If I<first> is omitted, the first page is assumed,
-and if I<last> is omitted, the last page is assumed.
+and if I<last> is omitted, the last page is assumed.
The prefix character `_' indicates that the page number is relative
to the end of the document, counting backwards. If just this
@@ -105,30 +118,30 @@ I<mm>. The default unit is I<pt>.
Specify the paper size of the output file, as an alternative to B<-w>
and B<-h>. Can be set to I<a0>, I<a1>, I<a2>, I<a3>, I<a4>, I<a5>, I<b5>,
I<letter>, I<legal>, I<ledger>, I<tabloid>, I<statement>, I<executive>,
-I<folio>, I<quarto>, or I<10x14>. The default output paper size is I<a4>.
+I<folio>, I<quarto>, or I<10x14>. The default output paper size is I<a4>.
=item B<-W> I<length>, B<--Width=>I<length>
Same as the option B<-w>, but for the input file. This option is ignored if
-the crop option (B<-c>) is set.
+the option B<-c> (cropping) is set.
=item B<-H> I<length>, B<--Height=>I<length>
Same as the option B<-h>, but for the input file. This option is ignored if
-the crop option (B<-c>) is set.
+the option B<-c> (cropping) is set.
=item B<-P> I<paper>, B<--Paper=>I<paper>
Same as the option B<-p>, but for the input file. By default,
B<PDFTool> will try to guess this value from the header of the file,
and will fail if the information is missing. This option is ignored if the
-crop option (B<-c>) is set.
+option B<-c> (cropping) is set.
=item B<-b> I<length>, B<--border=>I<length>
Add a margin around each logical page on a sheet. Possible units are I<pt>,
I<in>, I<cm> and I<mm>. The default unit is I<pt>. The default border is
-I<1cm> if the crop option (B<-c>) is set, and I<0> otherwise.
+I<1cm> if the option B<-c> (cropping) is set, and I<0> otherwise.
=item B<-m> I<length>, B<--margin=>I<length>
@@ -139,14 +152,14 @@ and I<mm>. The default unit is I<pt>. The default margin is I<0>.
If this option is set, the PostScript code will interpreted to calculate the
maximal effective bounding box. This operation may take time and be quite
-demanding for the CPU. See the note for the border option (B<-b>) above.
+demanding for the CPU. See the note for the option B<-b> above.
=item B<--book>
Rearrange pages for printing books or booklets. If your "default" duplex
mode (see B<DESCRIPTION>) is "NoTumble", you should either use C<lpr> with
the option "Tumble" manually, or consider the B<PDFTool> option
-B<--screen> instead.
+B<--no-rotate> instead.
=item B<-n> I<num>, B<--nup=>I<num>
@@ -154,18 +167,18 @@ Put multiple logical pages onto each physical sheet of paper.
If I<num> is less than 10, the option B<->I<num> may be used as an
alternative.
-=item B<--screen>
+=item B<--no-rotate>
By default, B<PDFTool> ensures that your pdf will be printable using
-your "default" duplex mode (see B<DESCRIPTION>). B<--screen> tries to
+your "default" duplex mode (see B<DESCRIPTION>). B<--no-rotate> tries to
make the output PDF ready to read on your computer instead. It has no
effect for portrait documents.
=item B<--pdf>
-By default, B<PDFTool>'s output is a PostScript file; use this flag if
+By default, B<PDFTool> outputs a PostScript file; use this flag if
you want a PDF instead. Note that since reading a PDF requires random
-access to the data, you won't be able to pipe the output to C<lpr> then.
+access to the data, you will not be able to pipe the output to C<lpr> then.
=item B<--column>
@@ -214,14 +227,13 @@ Requires GhostScript installed and available via the command C<gs>
=head1 AUTHOR
-Copyright 2010-2011 Guilhem Moulin. See the source for copying
+Copyright 2010-2012 Guilhem Moulin. See the source for copying
conditions.
=cut
-my $tmpdir = '/tmp';
my @gs = ('gs', '-dSAFER');
#
@@ -237,7 +249,7 @@ my $nup = 1;
my $column;
my $quiet;
my $man;
-my $screen;
+my $norotate;
my $pdfout;
GetOptions( "select|s=s" => \$select,
@@ -253,7 +265,7 @@ GetOptions( "select|s=s" => \$select,
"book" => \$book,
"pdf" => \$pdfout,
"nup|n=i" => \$nup,
- "screen" => \$screen,
+ "no-rotate" => \$norotate,
"1" => sub { $nup = 1 },
"2" => sub { $nup = 2 },
"3" => sub { $nup = 3 },
@@ -298,17 +310,17 @@ unless (defined $border) {
map {&topoints ($_)} ( \$outwidth, \$outheight,
\$inwidth, \$inheight,
\$margin, \$border );
-die "Margins are too big" if $outwidth <= $margin*2 or $outheight <= $margin*2;
+die "Margins are too big.\n" if $outwidth <= $margin*2 or $outheight <= $margin*2;
#
# Check options
#
-die "Bad page range: `$select'" if
+die "Bad page range: `" .$select. "'.\n" if
defined $select && not $select =~ /^(_?\d*-?_?\d*,)*_?\d*-?_?\d*$/;
-die "Bad nup: `$nup'" if
+die "Bad nup: `" .$nup. "'.\n" if
defined $nup && not ($nup =~ /^\d+$/ && $nup > 0);
@@ -318,7 +330,7 @@ die "Bad nup: `$nup'" if
#
my ($FIN, $FOUT);
if (defined $infile && $infile ne "-") {
- open $FIN, '<', $infile or die "Can't read `$infile': $!";
+ open $FIN, '<', $infile or die "Cannot read `" .$infile. "': $!\n";
} else {
undef $infile;
$FIN = *STDIN;
@@ -326,7 +338,7 @@ if (defined $infile && $infile ne "-") {
if (defined $outfile && $outfile ne "-") {
- open $FOUT, '>', "$outfile" or die "Can't create `$outfile': $!";
+ open $FOUT, '>', "$outfile" or die "Cannot create `" .$outfile. "': $!\n";
} else {
$FOUT = *STDOUT;
}
@@ -374,12 +386,12 @@ finally {
# Avoid zombies
map { my ($pid, @cmd) = @$_;
my ($r,$v) = (waitpid ($pid, 0), $?);
- warn "Can't run `" . &printcmd (@cmd) . "'"
+ warn "Warning: Cannot run `" .&printcmd (@cmd). "'.\n"
if ($r != -1 and $v >> 8);
} @pids;
# Close opened file handles
- map { close $_ or die "Can't close: $!" }
+ map { close $_ or die "Cannot close: $!" }
( $FIN, $FOUT );
# Delete auxiliary files
@@ -389,7 +401,7 @@ finally {
};
# Useless, but Perl doesn't see that this filehandle is used more than
-# one time
+# once
close IN; # automatically closed by `open3'
close OUT;
@@ -417,10 +429,10 @@ sub pdftops {
my $filetype;
my ($READ, $WRITE);
- pipe $READ, $WRITE or die "Can't pipe: $!";
+ pipe $READ, $WRITE or die "Cannot pipe: $!";
while (not (defined $filetype) && defined (my $l = <$IN>)) {
- print $WRITE ($l) or die "Can't print: $!";
+ print $WRITE ($l) or die "Cannot print: $!";
if (defined $l && $l =~ /^%!PS/) {
$filetype = "PS";
@@ -429,21 +441,21 @@ sub pdftops {
}
}
- die "Can't recognize the filetype" unless defined $filetype;
+ die "Cannot recognize the filetype.\n" unless defined $filetype;
unless (my $pid = fork) {
# Child: cat $IN > $WRITE in background
- die "Can't fork: $!" unless defined $pid;
- close $READ or die "Can't close: $!";
+ die "Cannot fork: $!" unless defined $pid;
+ close $READ or die "Cannot close: $!";
while (<$IN>) {
- print $WRITE ($_) or die "Can't print: $!";
+ print $WRITE ($_) or die "Cannot print: $!";
}
exit;
}
# Parent
- close $WRITE or die "Can't close: $!";
+ close $WRITE or die "Cannot close: $!";
return $READ if $filetype eq "PS";
@@ -456,15 +468,15 @@ sub pdftops {
# Need to copy the whole input to an auxiliary file, since
# conversion from PDF to PS requires random access to the data
- $infile = "$tmpdir/pdftool-stdin-$$." . lc $filetype;
+ $infile = catfile( tmpdir(), "pdftool-stdin-$$." . lc $filetype );
open my $AUX, '>', $infile
- or die "Can't write into `$infile': $!";
+ or die "Cannot write into `" .$infile. "': $!\n";
push @auxfiles, $infile;
# cat > $infile
while (<$READ>) {
- print $AUX ($_) or die "Can't print: $!";
+ print $AUX ($_) or die "Cannot print: $!";
}
close $AUX;
}
@@ -554,24 +566,24 @@ sub psbbox {
# The input is not seekable: have to create a seekable auxiliary
# file
- my $auxfile = "$tmpdir/pdftool-stdin-$$.ps";
+ my $auxfile = catfile( tmpdir(), "pdftool-stdin-$$.ps" );
open my $AUX, '>', $auxfile
- or die "Can't write into `$auxfile': $!";
+ or die "Cannot write into `" .$auxfile. "': $!\n";
push @auxfiles, $auxfile;
# cat > $auxfile
while (<$IN>) {
- print $AUX ($_) or die "Can't print: $!";
+ print $AUX ($_) or die "Cannot print: $!";
}
- close $AUX or die "Can't close: $!";
- close $IN or die "Can't close: $!";
+ close $AUX or die "Cannot close: $!";
+ close $IN or die "Cannot close: $!";
- open $IN, '<', $auxfile or die "Can't read `$auxfile': $!";
+ open $IN, '<', $auxfile or die "Cannot read `" .$auxfile. "': $!\n";
}
# Need to duplicate IN, since it will be closed in the parent process
- open *IN, '<&=', $IN or die "Can't fdopen: $!";
+ open *IN, '<&=', $IN or die "Cannot fdopen: $!";
my @cmd = (@gs, '-sDEVICE=bbox', '-dQUIET', '-dBATCH', '-dNOPAUSE', '-');
my $pid = open3 '<&IN', '>&OUT', *OUT, @cmd;
@@ -588,26 +600,26 @@ sub psbbox {
my $s = "[" . ++$p . "] ";
$c += length $s;
if ($c >= 80) {
- print LOG "\n" or die "Can't print: $!";
+ print LOG "\n" or die "Cannot print: $!";
$c = length $s;
}
- print LOG $s or die "Can't print: $!";
+ print LOG $s or die "Cannot print: $!";
}
}
}
- close OUT or die "Can't close: $!";
- print LOG "\n" or die "Can't print: $!" unless defined $quiet;
+ close OUT or die "Cannot close: $!";
+ print LOG "\n" or die "Cannot print: $!" unless defined $quiet;
# No zombie processes
waitpid $pid, 0;
- die "Can't run `" . &printcmd (@cmd) . "'" if $? >> 8;
+ die "Cannot run `" .&printcmd (@cmd). "'.\n" if $? >> 8;
- die "Error while calculating bounding box"
+ die "Error while calculating bounding box.\n"
if ($x0 >= $x1 || $y0 >= $y1);
@bbox = ($x0, $y0, $x1, $y1);
# Let's go back to the beginning of the input
- seek $IN, 0, 0 or die "Can't seek: $!";
+ seek $IN, 0, 0 or die "Cannot seek: $!";
$OUT = $IN;
} elsif (defined $inwidth and defined $inheight) {
@@ -620,29 +632,29 @@ sub psbbox {
# To avoid to seek into IN, it gonna be copied from WRITE to READ
# in background, once the Bounding Box has been read
my ($READ, $WRITE);
- pipe $READ, $WRITE or die "Can't pipe: $!";
+ pipe $READ, $WRITE or die "Cannot pipe: $!";
while (not (@bbox) && defined (my $l = <$IN>)) {
- print $WRITE ($l) or die "Can't print: $!";
+ print $WRITE ($l) or die "Cannot print: $!";
@bbox = ($1, $2, $3, $4)
if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/);
}
- die "Cannot guess input page size" unless @bbox;
+ die "Cannot guess input page size.\n" unless @bbox;
unless (my $pid = fork) {
# Child: cat IN > WRITE in background
- die "Can't fork: $!" unless defined $pid;
- close $READ or die "Can't close: $!";;
+ die "Cannot fork: $!" unless defined $pid;
+ close $READ or die "Cannot close: $!";;
while (<$IN>) {
- print $WRITE ($_) or die "Can't print: $!";
+ print $WRITE ($_) or die "Cannot print: $!";
}
exit;
}
# Parent
- close $WRITE or die "Can't close: $!";
+ close $WRITE or die "Cannot close: $!";
$OUT = $READ;
}
@@ -682,7 +694,7 @@ sub psnup {
if ((($bbox[2]-$bbox[0] > $bbox[3]-$bbox[1])
and not ($outwidth-2*$margin > $outheight-2*$margin))
or
- (defined $screen
+ (defined $norotate
and not ($bbox[2]-$bbox[0] > $bbox[3]-$bbox[1])
and ($outwidth-2*$margin > $outheight-2*$margin))) {
($outheight, $outwidth) = ($outwidth, $outheight);
@@ -737,7 +749,7 @@ sub psnup {
}
# Fail if nothing better than worst tolerance was found
- die "Can't find acceptable layout for $nup-up" if $best == $tolerance;
+ die "Cannot find acceptable layout for $nup-up.\n" if $best == $tolerance;
#
@@ -789,7 +801,7 @@ sub psnup {
($ow,$oh) = ($oh,$ow) if $rotate%2;
my $pagespecs;
- if (defined $screen || $ow < $oh) {
+ if (defined $norotate || $ow < $oh) {
$pagespecs = $nup . ':' . join ('+', @ospecs);
} else {
$pagespecs = 2*$nup . ':' . join ('+', @ospecs)
@@ -809,7 +821,8 @@ sub psnup {
#
-# Final file: Convert back to PDF
+# Final file: setup the correct orientation/page size, and convert to
+# PDF if necessary
#
sub pswrite {
my ($IN, $OUT, $landscape, $rotate) = @_;
@@ -818,8 +831,8 @@ sub pswrite {
($ow,$oh) = ($oh,$ow) if $rotate%2;
my $pagedevice;
- if (defined $screen || $oh < $oh || $landscape) {
- $rotate = ($rotate+1)%4 if not (defined $screen) and $oh < $ow;
+ if (defined $norotate || $oh < $oh || $landscape) {
+ $rotate = ($rotate+1)%4 if not (defined $norotate) and $oh < $ow;
$pagedevice = "/Orientation $rotate /PageSize [$outwidth $outheight]";
} else {
$pagedevice = "/PageSize [$outwidth $outheight]";
@@ -846,7 +859,7 @@ sub topoints {
my $l = $_[0];
return unless defined $$l;
- $$l =~ /^([+-]?\d*\.?\d+)(\w*)$/ or die "Unable to parse `$$l'";
+ $$l =~ /^([+-]?\d*\.?\d+)(\w*)$/ or die "Unable to parse `" .$$l. "'.\n";
my $r = $1;
if ($2 eq "" or $2 eq "pt") {
@@ -858,7 +871,7 @@ sub topoints {
} elsif ($2 eq "mm") {
$r *= 72/25.4;
} else {
- die "Unknown unit: `$2'";
+ die "Unknown unit: `$2'.\n";
}
$$l = floor ($r + .5);
}
@@ -905,7 +918,7 @@ sub papersize {
} elsif ($p eq "10x14") {
($$w,$$h) = ("10in", "14in");
} else {
- die "Unknown paper size: `$p'";
+ die "Unknown paper size: `" .$p. "'.\n";
}
}