summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xpdftool.pl409
1 files changed, 360 insertions, 49 deletions
diff --git a/pdftool.pl b/pdftool.pl
index a85e331..4c4b8ba 100755
--- a/pdftool.pl
+++ b/pdftool.pl
@@ -5,6 +5,7 @@ use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat
bundling auto_version auto_help);
use Pod::Usage;
use IPC::Open2;
+use IPC::Open3;
use POSIX qw(floor);
use strict;
@@ -15,8 +16,9 @@ pdftool.pl - a PDF swiss army knife
=head1 SYNOPSIS
-B<pdftool.pl> [-s I<pages>] [-p I<paper>] [-m I<margin>] [-c] [-b]
-[-n I<nup>] [-q] [I<infile> [I<outfile>]]
+B<pdftool.pl> [-s I<pages>] [-w I<width> ] [-h I<heigth>] [-p I<paper>]
+[-W I<width>] [-H I<heigth>] [-P I<paper>] [-p I<paper>] [-m I<margin>]
+[-c] [-b] [-n I<nup>] [-q] [I<infile> [I<outfile>]]
=head1 DESCRIPTION
@@ -70,13 +72,40 @@ The prefix character `_' indicates that the page number is relative
to the end of the document, counting backwards. If just this
character with no page number is used, a blank page will be inserted.
+=item B<-w, --width>
+
+Specify the width of the output file. If the height is not specified as
+well, it will be ignored. The known units are B<pt>, B<in>, B<cm> and
+B<mm>. The default unit is B<pt>.
+
+=item B<-h, --height>
+
+Specify the height of the output file. If the width is not specified as
+well, it will be ignored. The known units are B<pt>, B<in>, B<cm> and
+B<mm>. The default unit is B<pt>.
+
=item B<-p, --paper>
-Specify the paper size of the output file.
-Can be set to B<a0>, B<a1>, B<a2>, B<a3>, B<a4>, B<a5>, B<b5>,
+Specify the paper size of the output file, as an alternative to B<-w>
+and B<-h>. Can be set to B<a0>, B<a1>, B<a2>, B<a3>, B<a4>, B<a5>, B<b5>,
B<letter>, B<legal>, B<tabloid>, B<statement>, B<executive>, B<folio>,
B<quarto>, or B<10x14>. The default output paper size is B<a4>.
+=item B<-W, --Width>
+
+Same as the option B<-w>, but for the input file.
+
+=item B<-H, --Height>
+
+Same as the option B<-h>, but for the input file.
+
+=item B<-P, --Paper>
+
+Same as the option B<-p>, but for the input file. By default,
+I<PDFTool> will try to guess this value from the header of the file,
+and fail if the information is missing. This option is useless if the
+crop option (B<-c>) is set.
+
=item B<-m, --margin>
Add a margin to the output file. Possible units are B<pt>, B<in>, B<cm>
@@ -102,9 +131,9 @@ Rearrange pages for printing books or booklets.
=item B<-n, --nup>
-Puts multiple logical pages onto each physical sheet of paper. The inner
-margin might be same as the outer one (depending on the booklet option
-B<-b>), see B<-m> for details.
+Puts multiple logical pages (has to be a power of two) onto each physical
+sheet of paper. The inner margin might be same as the outer one (depending
+on the booklet option B<-b>), see B<-m> for details.
If I<nup> is less than 10, the option B<->I<nup> may be used as an
alternative.
@@ -148,12 +177,12 @@ Public domain, (c) Guilhem Moulin.
=head1 VERSION
-Version: 0.1, 25 September 2010
+Version: 0.2, 12 December 2010
=cut
# TODO: inline it in the header
-$main::VERSION = "0.1, 25 September 2010";
+$main::VERSION = "0.2, 12 December 2010";
@@ -164,17 +193,23 @@ my $tmpdir = '/tmp';
#
my $select;
-my $paper;
+my ($outwidth,$outheight, $inwidth,$inheight);
my $margin;
my $crop;
my $book;
my $nup = 1;
+my $rotdir = 'L';
my $quiet;
my $man;
# TODO: choose the output type
GetOptions( "select|s=s" => \$select,
- "paper|p=s" => \$paper,
+ "w|width=s" => \$outwidth,
+ "h|height=s" => \$outheight,
+ "p|paper=s" => sub { &papersize ($_[1],\$outwidth,\$outheight) },
+ "W|Width=s" => \$inwidth,
+ "H|Height=s" => \$inheight,
+ "P|Paper=s" => sub { &papersize ($_[1],\$inwidth,\$inheight) },
"margin|m=s" => \$margin,
"crop|c" => \$crop,
"book|b" => \$book,
@@ -204,18 +239,20 @@ my ($infile, $outfile) = @ARGV;
# Default values
#
-# Default output papersize
-$paper = "a4" unless defined $paper;
-
-
# Default margin
unless (defined $margin) {
$margin = 0;
$margin = "1cm" if defined $crop;
}
+# Default output papersize
+&papersize ("a4", \$outwidth, \$outheight)
+ unless (defined $outwidth and defined $outheight);
+
# Default unit: PostScript point
-&topoints( \$margin );
+map {&topoints ($_)} ( \$outwidth, \$outheight,
+ \$inwidth, \$inheight,
+ \$margin );
# Inner and outer margins
my ($mresize, $mnup) = (0,0);
@@ -226,6 +263,12 @@ if ($nup > 1 && not defined $book) {
$mresize = $margin;
}
+# TODO: would be nice to generalize $nup to any integer that psnup would
+# accept.
+my $i = -1;
+while (1<<++$i < $nup) {};
+die "nup has to be a power of two" if 1<<$i > $nup;
+$nup = $i;
# Open input and output files
@@ -248,6 +291,8 @@ if (defined $outfile && $outfile ne "-") {
*FOUT = *STDOUT;
}
+*LOG = *STDERR;
+
#
@@ -319,45 +364,192 @@ if ($filetype eq "PDF") {
}
close FINAUX;
}
+
+ my ($first, $last);
+ if (defined $select) {
+ # Convert to PS only the pages we are interested in
+ ($first, $last) = (1<<16,-(1<<16));
+ for (split / *, */, $select) {
+ my ($rmin, $rmax) = split (/ *- */, $_);
+ undef $first if defined $rmin && not $rmin;
+ undef $last if defined $rmax && not $rmax;
+ if (defined $rmin && $rmin) {
+ $first = $rmin if defined $first && $rmin < $first;
+ $last = $rmin if defined $last && $rmin > $last;
+ }
+ if (defined $rmax && $rmax) {
+ $first = $rmax if defined $first && $rmax < $first;
+ $last = $rmax if defined $last && $rmax > $last;
+ }
+ }
+
+ # Calculate the new page range
+ my @newselect;
+ for (split / *, */, $select) {
+ my ($rmin, $rmax) = split / *- */, $_;
+ $rmin -= $first-1 if defined $first && defined $rmin && $rmin;
+ $rmax -= $first-1 if defined $first && defined $rmax && $rmax;
+ my $r = "";
+ $r .= $rmin if defined $rmin;
+ $r .= "-";
+ $r .= $rmax if defined $rmax;
+ print "$r\n";
+ push @newselect, $r;
+ }
+
+ $select = join ',', @newselect;
+ }
# Convert to PS
- @cmd = ('pdftops', "$infile", '-');
+ @cmd = ('pdftops', '-origpagesizes', "$infile", '-');
+ push @cmd, '-f', $first if defined $first;
+ push @cmd, '-l', $last if defined $last;
push @cmd, '-q' if defined $quiet;
- my $pid = open PSIN, "-|", @cmd
+ my $pid = open *PSIN, "-|", @cmd
or die "Can't run `" . &printcmd (@cmd) . "'";
push @pids, [$pid, @cmd];
} else {
open *PSIN, "<&FIN" or die "Can't dup: $!";
}
+open *IN, "<&PSIN" or die "Can't dup: $!";
#
# Select, if necessary
#
-# TODO: preselection, during the conversion from pdf?
if (defined $select) {
@cmd = ('psselect', "-p$select");
push @cmd, '-q' if defined $quiet;
- my $pid = open2 *PSSELECT, "<&PSIN", @cmd;
+ my $pid = open2 *OUT, "<&IN", @cmd;
push @pids, [$pid, @cmd];
+ open *IN, "<&OUT" or die "Can't dup: $!";
+}
+
+
+
+#
+# Bounding box
+#
+my @bbox;
+if (defined $crop) {
+ # Calculate the maximal bounding box
+
+ unless (seek IN, 0, 1) {
+ # The input is not seekable: have to create a seekable auxiliary file
+
+ my $auxfile = "$tmpdir/psresize-stdin-$$.ps";
+
+ open AUXFD, '>', "$auxfile"
+ or die "Can't write into `$auxfile': $!";
+ push @auxfiles, $auxfile;
+
+ # cat > $auxfile
+ while (<IN>) {
+ print AUXFD or die "Can't print: $!";
+ }
+ close AUXFD or die "Can't close: $!";
+ close IN or die "Can't close: $!";
+
+ open IN, '<', "$auxfile" or die "Can't read `$auxfile': $!";
+ }
+
+ # Need to duplicate IN, since it will be closed in the parent process
+ open *GSIN, '<&IN';
+
+ @cmd = ('gs', '-sDEVICE=bbox', '-dBATCH', '-dNOPAUSE', '-');
+ my $pid = open3 "<&GSIN", ">&GSOUT", *GSOUT, @cmd;
+
+ my ($p,$c) = (0,0); # Page & character counter
+ my ($x0, $y0, $x1, $y1) = (1<<16, 1<<16, -(1<<16), -(1<<16));
+ while (<GSOUT>) {
+ if ($_ =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/) {
+ $x0 = $1 if $1 < $x0;
+ $y0 = $2 if $2 < $y0;
+ $x1 = $3 if $3 > $x1;
+ $y1 = $4 if $4 > $y1;
+ unless (defined $quiet) {
+ my $s = "[" . ++$p . "] ";
+ $c += length $s;
+ if ($c >= 80) {
+ print LOG "\n" or die "Can't close: $!";
+ $c = length $s;
+ }
+ print LOG $s or die "Can't close: $!";
+ }
+ }
+ }
+ close GSOUT or die "Can't close: $!";;
+ print LOG "\n" or die "Can't close: $!" unless defined $quiet;
+
+ # No zombie processes
+ waitpid $pid, 0;
+ die "Can't run `" . &printcmd (@cmd) . "'" if $? >> 8;
+
+ die "Error when calculating bounding box" if ($x0 >= $x1 || $y0 >= $y1);
+ @bbox = ($x0, $y0, $x1, $y1);
+
+ # Let's go back to the beginning of the input
+ seek IN, 0, 0 or die "$!";
+
+} elsif (defined $inwidth and defined $inheight) {
+ @bbox = (0, 0, $inwidth, $inheight);
} else {
- *PSSELECT = *PSIN;
+ # Guess page size from the input file
+
+ # To avoid to seek into IN, it gonna be copied from WRITE to READ
+ # in background, once the Bounding Box has been read
+ pipe *READ, *WRITE or die "Can't pipe: $!";
+
+ while (not (@bbox) && defined (my $l = <IN>)) {
+ print WRITE $l or die "Can't close: $!";
+ @bbox = ($1, $2, $3, $4)
+ if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/);
+ }
+
+ die "Cannot guess input page size!" unless @bbox;
+
+ unless (my $pid = fork) {
+ # Child: cat IN > WRITE in background
+ die "Can't fork: $!" unless defined $pid;
+ close READ or die "Can't close: $!";;
+
+ while (<IN>) {
+ print WRITE or die "Can't close: $!";
+ }
+ exit;
+ }
+ # Parent
+
+ close WRITE or die "Can't close: $!";
+ close IN or die "Can't close: $!";
+
+ open *IN, "<&READ" or die "Can't dup: $!";
}
#
-# Resize file to our paper
+# Calculate PStoPS specification
#
-@cmd = ('psresize2.pl', "-p$paper", "-m$mresize");
-push @cmd, "-c" if defined $crop;
-push @cmd, '-q' if defined $quiet;
+if (($outwidth > $bbox[3] - $bbox[1]) xor ($bbox[2] - $bbox[0] > $outheight)) {
+ ($outwidth, $outheight) = ($outheight, $outwidth);
+}
+my ($x0,$x1) = &calculate_coordinates($outwidth , $margin);
+my ($y0,$y1) = &calculate_coordinates($outheight, $margin);
-my $pid = open2 *PSRESIZE, "<&PSSELECT", @cmd;
-push @pids, [$pid, @cmd];
-# Note: open2 closes the filehandles for us :)
+my $rotation;
+my $spec = 0 . &calc_pstops_page(@bbox, $x0, $y0, $x1, $y1);
+
+
+#
+# Run the program and filter the output
+#
+@cmd = ('pstops', "-w$outwidth", "-h$outheight", "$spec");
+push @cmd, '-q' if defined $quiet;
+my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd;
+open *IN, "<&OUT" or die "Can't dup: $!";
@@ -368,10 +560,9 @@ if (defined $book) {
@cmd = ('psbook');
push @cmd, '-q' if defined $quiet;
- my $pid = open2 *PSBOOK, "<&PSRESIZE", @cmd;
+ my $pid = open2 *OUT, "<&IN", @cmd;
+ open *IN, "<&OUT" or die "Can't dup: $!";
push @pids, [$pid, @cmd];
-} else {
- *PSBOOK = *PSRESIZE;
}
@@ -379,33 +570,33 @@ if (defined $book) {
#
# PSNup
#
-if ($nup > 1) {
- @cmd = ('psnup', "-p$paper", "-m$mnup", "-$nup");
+if (1<<$nup > 1) {
+ my ($inwidth, $inheight) = ($outwidth, $outheight);
+ if ($nup % 2) {
+ ($outwidth, $outheight) = ($outheight, $outwidth);
+ }
+ $nup = 1<<$nup;
+ @cmd = ('psnup', "-W$inwidth", "-H$inheight",
+ "-w$outwidth", "-h$outheight",
+ "-m$mnup", "-$nup");
push @cmd, '-q' if defined $quiet;
- my $pid = open2 *PSOUT, "<&PSBOOK", @cmd;
+ my $pid = open2 *OUT, "<&IN", @cmd;
+ open *IN, "<&OUT" or die "Can't dup: $!";
push @pids, [$pid, @cmd];
-} else {
- *PSOUT = *PSBOOK;
}
#
-# Final file
+# Final file: Convert back to PDF
#
-if ($filetype eq "PDF") {
- # Convert back to PDF
-
- @cmd = ('ps2pdf', "-dEmbedAllFonts=true", "-sPAPERSIZE=$paper", '-', '-');
- $pid = open2 ">&FOUT", "<&PSOUT", @cmd;
- push @pids, [$pid, @cmd];
-} else {
- # cat > FOUT
- while (<PSOUT>) {
- print FOUT or die "Can't print: $!";
- }
-}
+@cmd = ('gs', "-sDEVICE=pdfwrite", "-sOutputFile=%stdout%", "-dBATCH",
+ "-dNOPAUSE", "-dAutoRotatePages=/None",
+ "-c", "<< /Orientation 0 /PageSize [$outwidth $outheight] >> setpagedevice",
+ "-f", "-");
+$pid = open2 ">&FOUT", "<&IN", @cmd;
+push @pids, [$pid, @cmd];
# Avoid zombies
@@ -425,10 +616,86 @@ unlink @auxfiles;
+# Useless, but Perl doesn't see that this filehandle is used more than
+# one time (and even automatically closed by `open3')
+exit;
+close GSIN;
+
+
# =========================================================
#
+# Calculate an item of the pstops specification
+#
+sub calc_pstops_page {
+ my ($fx0, $fy0, $fx1, $fy1,
+ $tx0, $ty0, $tx1, $ty1) = @_;
+
+ # From and to width / height
+ my ($wf, $hf) = ($fx1 - $fx0, $fy1 - $fy0);
+ my ($wt, $ht) = ($tx1 - $tx0, $ty1 - $ty0);
+
+ # Check if rotation required (in our case, should always be 0)
+ my $rotation = (($wf > $hf) xor ($wt > $ht));
+
+ # Scale factor width / height
+ my ($sw, $sh);
+ if ($rotation) {
+ ($sw, $sh) = ($ht / $wf, $wt / $hf);
+ } else {
+ ($sw, $sh) = ($wt / $wf, $ht / $hf);
+ }
+
+ # We take the smallest scale
+ my $scale = ($sw > $sh) ? $sh : $sw;
+
+ # Calculate the centers of the boxes
+ my ($cxf, $cyf) = ( .5 * ($fx0 + $fx1), .5 * ($fy0 + $fy1) );
+ my ($cxt, $cyt) = ( .5 * ($tx0 + $tx1), .5 * ($ty0 + $ty1) );
+
+ # First, PStoPs scales, then rotates, then moves
+ ($cxf, $cyf) = ($cxf * $scale, $cyf * $scale);
+ if ($rotation) {
+ if ($rotdir eq 'L') {
+ ($cxf, $cyf) = (-$cyf, $cxf);
+ } else {
+ ($cxf, $cyf) = ($cyf, -$cxf);
+ }
+ } else {
+ $rotdir = '';
+ }
+ my ($movex, $movey) = ($cxt - $cxf, $cyt - $cyf);
+
+ # Generate the summary
+ return sprintf( '%s@%.3f(%.3f,%.3f)', $rotdir, $scale, $movex, $movey);
+}
+
+
+
+#
+# Calculate the begining and ending coordinates, after shaving 2 times
+# the margin
+#
+sub calculate_coordinates {
+ my ($length, $margin) = @_;
+ my $skip = $length - $margin;
+ my $outwidth = $skip - $margin;
+ return ( &round( &round($skip) - $outwidth ), &round($skip) );
+}
+
+
+
+#
+# Round a float number
+#
+sub round {
+ return floor ($_[0] + .5);
+}
+
+
+
+#
# In-place convert the given length to PostScript points
#
sub topoints {
@@ -453,6 +720,49 @@ sub topoints {
}
+
+#
+# In-place set the given width and height to the predefined papersize
+#
+sub papersize {
+ my ($p,$w,$h) = @_;
+ $p = lc $p;
+
+ if ($p eq "a0") {
+ ($$w,$$h) = ("841mm", "1189mm");
+ } elsif ($p eq "a1") {
+ ($$w,$$h) = ("594mm", "841mm");
+ } elsif ($p eq "a2") {
+ ($$w,$$h) = ("420mm", "594mm");
+ } elsif ($p eq "a3") {
+ ($$w,$$h) = ("297mm", "420mm");
+ } elsif ($p eq "a4") {
+ ($$w,$$h) = ("210mm", "297mm");
+ } elsif ($p eq "a5") {
+ ($$w,$$h) = ("148mm", "210mm");
+ } elsif ($p eq "letter") {
+ ($$w,$$h) = ("8.5in", "11in");
+ } elsif ($p eq "legal") {
+ ($$w,$$h) = ("8.5in", "14in");
+ } elsif ($p eq "tabloid") {
+ ($$w,$$h) = ("11in", "17in");
+ } elsif ($p eq "statement") {
+ ($$w,$$h) = ("5.5in", "8.5in");
+ } elsif ($p eq "executive") {
+ ($$w,$$h) = ("7.25in", "10.5in");
+ } elsif ($p eq "folio") {
+ ($$w,$$h) = ("8.27in", "13in");
+ } elsif ($p eq "quarto") {
+ ($$w,$$h) = ("9in", "11in");
+ } elsif ($p eq "10x14") {
+ ($$w,$$h) = ("10in", "14in");
+ } else {
+ die "Unknown paper size: `$p'";
+ }
+}
+
+
+
#
# Print a command just like you'd do in a shell
#
@@ -468,3 +778,4 @@ sub printcmd {
join ' ', @cmd;
}
+