diff options
-rwxr-xr-x | pdftool.pl | 409 |
1 files changed, 360 insertions, 49 deletions
@@ -5,6 +5,7 @@ use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help); use Pod::Usage; use IPC::Open2; +use IPC::Open3; use POSIX qw(floor); use strict; @@ -15,8 +16,9 @@ pdftool.pl - a PDF swiss army knife =head1 SYNOPSIS -B<pdftool.pl> [-s I<pages>] [-p I<paper>] [-m I<margin>] [-c] [-b] -[-n I<nup>] [-q] [I<infile> [I<outfile>]] +B<pdftool.pl> [-s I<pages>] [-w I<width> ] [-h I<heigth>] [-p I<paper>] +[-W I<width>] [-H I<heigth>] [-P I<paper>] [-p I<paper>] [-m I<margin>] +[-c] [-b] [-n I<nup>] [-q] [I<infile> [I<outfile>]] =head1 DESCRIPTION @@ -70,13 +72,40 @@ The prefix character `_' indicates that the page number is relative to the end of the document, counting backwards. If just this character with no page number is used, a blank page will be inserted. +=item B<-w, --width> + +Specify the width of the output file. If the height is not specified as +well, it will be ignored. The known units are B<pt>, B<in>, B<cm> and +B<mm>. The default unit is B<pt>. + +=item B<-h, --height> + +Specify the height of the output file. If the width is not specified as +well, it will be ignored. The known units are B<pt>, B<in>, B<cm> and +B<mm>. The default unit is B<pt>. + =item B<-p, --paper> -Specify the paper size of the output file. -Can be set to B<a0>, B<a1>, B<a2>, B<a3>, B<a4>, B<a5>, B<b5>, +Specify the paper size of the output file, as an alternative to B<-w> +and B<-h>. Can be set to B<a0>, B<a1>, B<a2>, B<a3>, B<a4>, B<a5>, B<b5>, B<letter>, B<legal>, B<tabloid>, B<statement>, B<executive>, B<folio>, B<quarto>, or B<10x14>. The default output paper size is B<a4>. +=item B<-W, --Width> + +Same as the option B<-w>, but for the input file. + +=item B<-H, --Height> + +Same as the option B<-h>, but for the input file. + +=item B<-P, --Paper> + +Same as the option B<-p>, but for the input file. By default, +I<PDFTool> will try to guess this value from the header of the file, +and fail if the information is missing. This option is useless if the +crop option (B<-c>) is set. + =item B<-m, --margin> Add a margin to the output file. Possible units are B<pt>, B<in>, B<cm> @@ -102,9 +131,9 @@ Rearrange pages for printing books or booklets. =item B<-n, --nup> -Puts multiple logical pages onto each physical sheet of paper. The inner -margin might be same as the outer one (depending on the booklet option -B<-b>), see B<-m> for details. +Puts multiple logical pages (has to be a power of two) onto each physical +sheet of paper. The inner margin might be same as the outer one (depending +on the booklet option B<-b>), see B<-m> for details. If I<nup> is less than 10, the option B<->I<nup> may be used as an alternative. @@ -148,12 +177,12 @@ Public domain, (c) Guilhem Moulin. =head1 VERSION -Version: 0.1, 25 September 2010 +Version: 0.2, 12 December 2010 =cut # TODO: inline it in the header -$main::VERSION = "0.1, 25 September 2010"; +$main::VERSION = "0.2, 12 December 2010"; @@ -164,17 +193,23 @@ my $tmpdir = '/tmp'; # my $select; -my $paper; +my ($outwidth,$outheight, $inwidth,$inheight); my $margin; my $crop; my $book; my $nup = 1; +my $rotdir = 'L'; my $quiet; my $man; # TODO: choose the output type GetOptions( "select|s=s" => \$select, - "paper|p=s" => \$paper, + "w|width=s" => \$outwidth, + "h|height=s" => \$outheight, + "p|paper=s" => sub { &papersize ($_[1],\$outwidth,\$outheight) }, + "W|Width=s" => \$inwidth, + "H|Height=s" => \$inheight, + "P|Paper=s" => sub { &papersize ($_[1],\$inwidth,\$inheight) }, "margin|m=s" => \$margin, "crop|c" => \$crop, "book|b" => \$book, @@ -204,18 +239,20 @@ my ($infile, $outfile) = @ARGV; # Default values # -# Default output papersize -$paper = "a4" unless defined $paper; - - # Default margin unless (defined $margin) { $margin = 0; $margin = "1cm" if defined $crop; } +# Default output papersize +&papersize ("a4", \$outwidth, \$outheight) + unless (defined $outwidth and defined $outheight); + # Default unit: PostScript point -&topoints( \$margin ); +map {&topoints ($_)} ( \$outwidth, \$outheight, + \$inwidth, \$inheight, + \$margin ); # Inner and outer margins my ($mresize, $mnup) = (0,0); @@ -226,6 +263,12 @@ if ($nup > 1 && not defined $book) { $mresize = $margin; } +# TODO: would be nice to generalize $nup to any integer that psnup would +# accept. +my $i = -1; +while (1<<++$i < $nup) {}; +die "nup has to be a power of two" if 1<<$i > $nup; +$nup = $i; # Open input and output files @@ -248,6 +291,8 @@ if (defined $outfile && $outfile ne "-") { *FOUT = *STDOUT; } +*LOG = *STDERR; + # @@ -319,45 +364,192 @@ if ($filetype eq "PDF") { } close FINAUX; } + + my ($first, $last); + if (defined $select) { + # Convert to PS only the pages we are interested in + ($first, $last) = (1<<16,-(1<<16)); + for (split / *, */, $select) { + my ($rmin, $rmax) = split (/ *- */, $_); + undef $first if defined $rmin && not $rmin; + undef $last if defined $rmax && not $rmax; + if (defined $rmin && $rmin) { + $first = $rmin if defined $first && $rmin < $first; + $last = $rmin if defined $last && $rmin > $last; + } + if (defined $rmax && $rmax) { + $first = $rmax if defined $first && $rmax < $first; + $last = $rmax if defined $last && $rmax > $last; + } + } + + # Calculate the new page range + my @newselect; + for (split / *, */, $select) { + my ($rmin, $rmax) = split / *- */, $_; + $rmin -= $first-1 if defined $first && defined $rmin && $rmin; + $rmax -= $first-1 if defined $first && defined $rmax && $rmax; + my $r = ""; + $r .= $rmin if defined $rmin; + $r .= "-"; + $r .= $rmax if defined $rmax; + print "$r\n"; + push @newselect, $r; + } + + $select = join ',', @newselect; + } # Convert to PS - @cmd = ('pdftops', "$infile", '-'); + @cmd = ('pdftops', '-origpagesizes', "$infile", '-'); + push @cmd, '-f', $first if defined $first; + push @cmd, '-l', $last if defined $last; push @cmd, '-q' if defined $quiet; - my $pid = open PSIN, "-|", @cmd + my $pid = open *PSIN, "-|", @cmd or die "Can't run `" . &printcmd (@cmd) . "'"; push @pids, [$pid, @cmd]; } else { open *PSIN, "<&FIN" or die "Can't dup: $!"; } +open *IN, "<&PSIN" or die "Can't dup: $!"; # # Select, if necessary # -# TODO: preselection, during the conversion from pdf? if (defined $select) { @cmd = ('psselect', "-p$select"); push @cmd, '-q' if defined $quiet; - my $pid = open2 *PSSELECT, "<&PSIN", @cmd; + my $pid = open2 *OUT, "<&IN", @cmd; push @pids, [$pid, @cmd]; + open *IN, "<&OUT" or die "Can't dup: $!"; +} + + + +# +# Bounding box +# +my @bbox; +if (defined $crop) { + # Calculate the maximal bounding box + + unless (seek IN, 0, 1) { + # The input is not seekable: have to create a seekable auxiliary file + + my $auxfile = "$tmpdir/psresize-stdin-$$.ps"; + + open AUXFD, '>', "$auxfile" + or die "Can't write into `$auxfile': $!"; + push @auxfiles, $auxfile; + + # cat > $auxfile + while (<IN>) { + print AUXFD or die "Can't print: $!"; + } + close AUXFD or die "Can't close: $!"; + close IN or die "Can't close: $!"; + + open IN, '<', "$auxfile" or die "Can't read `$auxfile': $!"; + } + + # Need to duplicate IN, since it will be closed in the parent process + open *GSIN, '<&IN'; + + @cmd = ('gs', '-sDEVICE=bbox', '-dBATCH', '-dNOPAUSE', '-'); + my $pid = open3 "<&GSIN", ">&GSOUT", *GSOUT, @cmd; + + my ($p,$c) = (0,0); # Page & character counter + my ($x0, $y0, $x1, $y1) = (1<<16, 1<<16, -(1<<16), -(1<<16)); + while (<GSOUT>) { + if ($_ =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/) { + $x0 = $1 if $1 < $x0; + $y0 = $2 if $2 < $y0; + $x1 = $3 if $3 > $x1; + $y1 = $4 if $4 > $y1; + unless (defined $quiet) { + my $s = "[" . ++$p . "] "; + $c += length $s; + if ($c >= 80) { + print LOG "\n" or die "Can't close: $!"; + $c = length $s; + } + print LOG $s or die "Can't close: $!"; + } + } + } + close GSOUT or die "Can't close: $!";; + print LOG "\n" or die "Can't close: $!" unless defined $quiet; + + # No zombie processes + waitpid $pid, 0; + die "Can't run `" . &printcmd (@cmd) . "'" if $? >> 8; + + die "Error when calculating bounding box" if ($x0 >= $x1 || $y0 >= $y1); + @bbox = ($x0, $y0, $x1, $y1); + + # Let's go back to the beginning of the input + seek IN, 0, 0 or die "$!"; + +} elsif (defined $inwidth and defined $inheight) { + @bbox = (0, 0, $inwidth, $inheight); } else { - *PSSELECT = *PSIN; + # Guess page size from the input file + + # To avoid to seek into IN, it gonna be copied from WRITE to READ + # in background, once the Bounding Box has been read + pipe *READ, *WRITE or die "Can't pipe: $!"; + + while (not (@bbox) && defined (my $l = <IN>)) { + print WRITE $l or die "Can't close: $!"; + @bbox = ($1, $2, $3, $4) + if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/); + } + + die "Cannot guess input page size!" unless @bbox; + + unless (my $pid = fork) { + # Child: cat IN > WRITE in background + die "Can't fork: $!" unless defined $pid; + close READ or die "Can't close: $!";; + + while (<IN>) { + print WRITE or die "Can't close: $!"; + } + exit; + } + # Parent + + close WRITE or die "Can't close: $!"; + close IN or die "Can't close: $!"; + + open *IN, "<&READ" or die "Can't dup: $!"; } # -# Resize file to our paper +# Calculate PStoPS specification # -@cmd = ('psresize2.pl', "-p$paper", "-m$mresize"); -push @cmd, "-c" if defined $crop; -push @cmd, '-q' if defined $quiet; +if (($outwidth > $bbox[3] - $bbox[1]) xor ($bbox[2] - $bbox[0] > $outheight)) { + ($outwidth, $outheight) = ($outheight, $outwidth); +} +my ($x0,$x1) = &calculate_coordinates($outwidth , $margin); +my ($y0,$y1) = &calculate_coordinates($outheight, $margin); -my $pid = open2 *PSRESIZE, "<&PSSELECT", @cmd; -push @pids, [$pid, @cmd]; -# Note: open2 closes the filehandles for us :) +my $rotation; +my $spec = 0 . &calc_pstops_page(@bbox, $x0, $y0, $x1, $y1); + + +# +# Run the program and filter the output +# +@cmd = ('pstops', "-w$outwidth", "-h$outheight", "$spec"); +push @cmd, '-q' if defined $quiet; +my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; +open *IN, "<&OUT" or die "Can't dup: $!"; @@ -368,10 +560,9 @@ if (defined $book) { @cmd = ('psbook'); push @cmd, '-q' if defined $quiet; - my $pid = open2 *PSBOOK, "<&PSRESIZE", @cmd; + my $pid = open2 *OUT, "<&IN", @cmd; + open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; -} else { - *PSBOOK = *PSRESIZE; } @@ -379,33 +570,33 @@ if (defined $book) { # # PSNup # -if ($nup > 1) { - @cmd = ('psnup', "-p$paper", "-m$mnup", "-$nup"); +if (1<<$nup > 1) { + my ($inwidth, $inheight) = ($outwidth, $outheight); + if ($nup % 2) { + ($outwidth, $outheight) = ($outheight, $outwidth); + } + $nup = 1<<$nup; + @cmd = ('psnup', "-W$inwidth", "-H$inheight", + "-w$outwidth", "-h$outheight", + "-m$mnup", "-$nup"); push @cmd, '-q' if defined $quiet; - my $pid = open2 *PSOUT, "<&PSBOOK", @cmd; + my $pid = open2 *OUT, "<&IN", @cmd; + open *IN, "<&OUT" or die "Can't dup: $!"; push @pids, [$pid, @cmd]; -} else { - *PSOUT = *PSBOOK; } # -# Final file +# Final file: Convert back to PDF # -if ($filetype eq "PDF") { - # Convert back to PDF - - @cmd = ('ps2pdf', "-dEmbedAllFonts=true", "-sPAPERSIZE=$paper", '-', '-'); - $pid = open2 ">&FOUT", "<&PSOUT", @cmd; - push @pids, [$pid, @cmd]; -} else { - # cat > FOUT - while (<PSOUT>) { - print FOUT or die "Can't print: $!"; - } -} +@cmd = ('gs', "-sDEVICE=pdfwrite", "-sOutputFile=%stdout%", "-dBATCH", + "-dNOPAUSE", "-dAutoRotatePages=/None", + "-c", "<< /Orientation 0 /PageSize [$outwidth $outheight] >> setpagedevice", + "-f", "-"); +$pid = open2 ">&FOUT", "<&IN", @cmd; +push @pids, [$pid, @cmd]; # Avoid zombies @@ -425,10 +616,86 @@ unlink @auxfiles; +# Useless, but Perl doesn't see that this filehandle is used more than +# one time (and even automatically closed by `open3') +exit; +close GSIN; + + # ========================================================= # +# Calculate an item of the pstops specification +# +sub calc_pstops_page { + my ($fx0, $fy0, $fx1, $fy1, + $tx0, $ty0, $tx1, $ty1) = @_; + + # From and to width / height + my ($wf, $hf) = ($fx1 - $fx0, $fy1 - $fy0); + my ($wt, $ht) = ($tx1 - $tx0, $ty1 - $ty0); + + # Check if rotation required (in our case, should always be 0) + my $rotation = (($wf > $hf) xor ($wt > $ht)); + + # Scale factor width / height + my ($sw, $sh); + if ($rotation) { + ($sw, $sh) = ($ht / $wf, $wt / $hf); + } else { + ($sw, $sh) = ($wt / $wf, $ht / $hf); + } + + # We take the smallest scale + my $scale = ($sw > $sh) ? $sh : $sw; + + # Calculate the centers of the boxes + my ($cxf, $cyf) = ( .5 * ($fx0 + $fx1), .5 * ($fy0 + $fy1) ); + my ($cxt, $cyt) = ( .5 * ($tx0 + $tx1), .5 * ($ty0 + $ty1) ); + + # First, PStoPs scales, then rotates, then moves + ($cxf, $cyf) = ($cxf * $scale, $cyf * $scale); + if ($rotation) { + if ($rotdir eq 'L') { + ($cxf, $cyf) = (-$cyf, $cxf); + } else { + ($cxf, $cyf) = ($cyf, -$cxf); + } + } else { + $rotdir = ''; + } + my ($movex, $movey) = ($cxt - $cxf, $cyt - $cyf); + + # Generate the summary + return sprintf( '%s@%.3f(%.3f,%.3f)', $rotdir, $scale, $movex, $movey); +} + + + +# +# Calculate the begining and ending coordinates, after shaving 2 times +# the margin +# +sub calculate_coordinates { + my ($length, $margin) = @_; + my $skip = $length - $margin; + my $outwidth = $skip - $margin; + return ( &round( &round($skip) - $outwidth ), &round($skip) ); +} + + + +# +# Round a float number +# +sub round { + return floor ($_[0] + .5); +} + + + +# # In-place convert the given length to PostScript points # sub topoints { @@ -453,6 +720,49 @@ sub topoints { } + +# +# In-place set the given width and height to the predefined papersize +# +sub papersize { + my ($p,$w,$h) = @_; + $p = lc $p; + + if ($p eq "a0") { + ($$w,$$h) = ("841mm", "1189mm"); + } elsif ($p eq "a1") { + ($$w,$$h) = ("594mm", "841mm"); + } elsif ($p eq "a2") { + ($$w,$$h) = ("420mm", "594mm"); + } elsif ($p eq "a3") { + ($$w,$$h) = ("297mm", "420mm"); + } elsif ($p eq "a4") { + ($$w,$$h) = ("210mm", "297mm"); + } elsif ($p eq "a5") { + ($$w,$$h) = ("148mm", "210mm"); + } elsif ($p eq "letter") { + ($$w,$$h) = ("8.5in", "11in"); + } elsif ($p eq "legal") { + ($$w,$$h) = ("8.5in", "14in"); + } elsif ($p eq "tabloid") { + ($$w,$$h) = ("11in", "17in"); + } elsif ($p eq "statement") { + ($$w,$$h) = ("5.5in", "8.5in"); + } elsif ($p eq "executive") { + ($$w,$$h) = ("7.25in", "10.5in"); + } elsif ($p eq "folio") { + ($$w,$$h) = ("8.27in", "13in"); + } elsif ($p eq "quarto") { + ($$w,$$h) = ("9in", "11in"); + } elsif ($p eq "10x14") { + ($$w,$$h) = ("10in", "14in"); + } else { + die "Unknown paper size: `$p'"; + } +} + + + # # Print a command just like you'd do in a shell # @@ -468,3 +778,4 @@ sub printcmd { join ' ', @cmd; } + |