diff options
| -rwxr-xr-x | pdftool.pl | 409 | 
1 files changed, 360 insertions, 49 deletions
| @@ -5,6 +5,7 @@ use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat                              bundling auto_version auto_help);   use Pod::Usage;  use IPC::Open2; +use IPC::Open3;  use POSIX qw(floor);  use strict; @@ -15,8 +16,9 @@ pdftool.pl - a PDF swiss army knife  =head1 SYNOPSIS -B<pdftool.pl> [-s I<pages>] [-p I<paper>] [-m I<margin>] [-c] [-b] -[-n I<nup>] [-q] [I<infile> [I<outfile>]] +B<pdftool.pl> [-s I<pages>] [-w I<width> ] [-h I<heigth>] [-p I<paper>] +[-W I<width>] [-H I<heigth>] [-P I<paper>] [-p I<paper>] [-m I<margin>] +[-c] [-b] [-n I<nup>] [-q] [I<infile> [I<outfile>]]  =head1 DESCRIPTION @@ -70,13 +72,40 @@ The prefix character `_' indicates that the page number is relative  to the end of the document, counting backwards. If just this  character with no page number is used, a blank page will be inserted. +=item B<-w, --width> + +Specify the width of the output file. If the height is not specified as +well, it will be ignored. The known units are B<pt>, B<in>, B<cm> and +B<mm>. The default unit is B<pt>. + +=item B<-h, --height> + +Specify the height of the output file. If the width is not specified as +well, it will be ignored. The known units are B<pt>, B<in>, B<cm> and +B<mm>. The default unit is B<pt>. +  =item B<-p, --paper> -Specify the paper size of the output file. -Can be set to B<a0>, B<a1>, B<a2>, B<a3>, B<a4>, B<a5>, B<b5>, +Specify the paper size of the output file, as an alternative to B<-w> +and B<-h>. Can be set to B<a0>, B<a1>, B<a2>, B<a3>, B<a4>, B<a5>, B<b5>,  B<letter>, B<legal>, B<tabloid>, B<statement>, B<executive>, B<folio>,  B<quarto>, or B<10x14>. The  default output paper size is B<a4>. +=item B<-W, --Width> + +Same as the option B<-w>, but for the input file. + +=item B<-H, --Height> + +Same as the option B<-h>, but for the input file. + +=item B<-P, --Paper> + +Same as the option B<-p>, but for the input file. By default, +I<PDFTool> will try to guess this value from the header of the file, +and fail if the information is missing. This option is useless if the +crop option (B<-c>) is set. +  =item B<-m, --margin>  Add a margin to the output file. Possible units are B<pt>, B<in>, B<cm> @@ -102,9 +131,9 @@ Rearrange pages for printing books or booklets.  =item B<-n, --nup> -Puts multiple logical pages onto each physical sheet of paper. The inner -margin might be same as the outer one (depending on the booklet option -B<-b>), see B<-m> for details. +Puts multiple logical pages (has to be a power of two) onto each physical +sheet of paper. The inner margin might be same as the outer one (depending +on the booklet option B<-b>), see B<-m> for details.  If I<nup> is less than 10, the option B<->I<nup> may be used as an  alternative. @@ -148,12 +177,12 @@ Public domain, (c) Guilhem Moulin.  =head1 VERSION -Version: 0.1, 25 September 2010 +Version: 0.2, 12 December 2010  =cut  # TODO: inline it in the header -$main::VERSION = "0.1, 25 September 2010"; +$main::VERSION = "0.2, 12 December 2010"; @@ -164,17 +193,23 @@ my $tmpdir   = '/tmp';  #  my $select; -my $paper; +my ($outwidth,$outheight, $inwidth,$inheight);  my $margin;  my $crop;  my $book;  my $nup = 1; +my $rotdir = 'L';  my $quiet;  my $man;  # TODO: choose the output type  GetOptions( "select|s=s" => \$select, -            "paper|p=s"  => \$paper, +            "w|width=s"  => \$outwidth, +            "h|height=s" => \$outheight, +            "p|paper=s"  => sub { &papersize ($_[1],\$outwidth,\$outheight) }, +            "W|Width=s"  => \$inwidth, +            "H|Height=s" => \$inheight, +            "P|Paper=s"  => sub { &papersize ($_[1],\$inwidth,\$inheight) },              "margin|m=s" => \$margin,              "crop|c"     => \$crop,              "book|b"     => \$book, @@ -204,18 +239,20 @@ my ($infile, $outfile) = @ARGV;  # Default values  # -# Default output papersize -$paper = "a4"  unless  defined $paper; - -  # Default margin  unless (defined $margin) {      $margin = 0;      $margin = "1cm" if defined $crop;  } +# Default output papersize +&papersize ("a4", \$outwidth, \$outheight) +    unless  (defined $outwidth  and  defined $outheight); +  # Default unit: PostScript point -&topoints( \$margin ); +map {&topoints ($_)} ( \$outwidth, \$outheight, +                       \$inwidth, \$inheight, +                       \$margin );  # Inner and outer margins  my ($mresize, $mnup) = (0,0); @@ -226,6 +263,12 @@ if ($nup > 1 && not defined $book) {      $mresize = $margin;  } +# TODO: would be nice to generalize $nup to any integer that psnup would +# accept. +my $i = -1; +while (1<<++$i < $nup) {}; +die "nup has to be a power of two"  if  1<<$i > $nup; +$nup = $i;  # Open input and output files @@ -248,6 +291,8 @@ if (defined $outfile && $outfile ne "-") {      *FOUT = *STDOUT;  } +*LOG = *STDERR; +  # @@ -319,45 +364,192 @@ if ($filetype eq "PDF") {          }          close FINAUX;      } + +    my ($first, $last); +    if (defined $select) { +        # Convert to PS only the pages we are interested in +        ($first, $last) = (1<<16,-(1<<16)); +        for (split / *, */, $select) { +            my ($rmin, $rmax) = split (/ *- */, $_); +            undef $first if defined $rmin && not $rmin; +            undef $last if defined $rmax && not $rmax; +            if (defined $rmin && $rmin) { +                $first = $rmin if defined $first && $rmin < $first;  +                $last = $rmin if defined $last && $rmin > $last;  +            } +            if (defined $rmax && $rmax) { +                $first = $rmax if defined $first && $rmax < $first;  +                $last = $rmax if defined $last && $rmax > $last;  +            } +        } + +        # Calculate the new page range +        my @newselect; +        for (split / *, */, $select) { +            my ($rmin, $rmax) = split / *- */, $_; +            $rmin -= $first-1 if defined $first && defined $rmin && $rmin; +            $rmax -= $first-1 if defined $first && defined $rmax && $rmax; +            my $r = ""; +            $r .= $rmin if defined $rmin; +            $r .= "-"; +            $r .= $rmax if defined $rmax; +            print "$r\n"; +            push @newselect, $r; +        } +         +        $select = join ',', @newselect; +    }      # Convert to PS -    @cmd = ('pdftops', "$infile", '-'); +    @cmd = ('pdftops', '-origpagesizes', "$infile", '-'); +    push @cmd, '-f', $first if defined $first;  +    push @cmd, '-l', $last if defined $last;       push @cmd, '-q'  if  defined $quiet; -    my $pid = open PSIN, "-|", @cmd +    my $pid = open *PSIN, "-|", @cmd                    or die "Can't run `" . &printcmd (@cmd) . "'";      push @pids, [$pid, @cmd];  } else {      open *PSIN, "<&FIN"  or  die "Can't dup: $!";  } +open *IN, "<&PSIN"  or  die "Can't dup: $!";  #  # Select, if necessary  # -# TODO: preselection, during the conversion from pdf?  if (defined $select) {      @cmd = ('psselect', "-p$select");      push @cmd, '-q'  if  defined $quiet; -    my $pid = open2 *PSSELECT, "<&PSIN", @cmd; +    my $pid = open2 *OUT, "<&IN", @cmd;      push @pids, [$pid, @cmd]; +    open *IN, "<&OUT"  or  die "Can't dup: $!"; +}  + + + +# +# Bounding box +# +my @bbox; +if (defined $crop) { +    # Calculate the maximal bounding box +     +    unless (seek IN, 0, 1) { +        # The input is not seekable: have to create a seekable auxiliary file + +        my $auxfile = "$tmpdir/psresize-stdin-$$.ps"; + +        open AUXFD, '>', "$auxfile" +            or die "Can't write into `$auxfile': $!"; +        push @auxfiles, $auxfile; + +        # cat > $auxfile +        while (<IN>) { +            print AUXFD or  die "Can't print: $!"; +        } +        close AUXFD  or  die "Can't close: $!"; +        close IN  or  die "Can't close: $!"; + +        open IN, '<', "$auxfile"  or  die "Can't read `$auxfile': $!"; +    } +  +    # Need to duplicate IN, since it will be closed in the parent process +    open *GSIN, '<&IN'; + +    @cmd = ('gs', '-sDEVICE=bbox', '-dBATCH', '-dNOPAUSE', '-'); +    my $pid = open3 "<&GSIN", ">&GSOUT", *GSOUT, @cmd; +     +    my ($p,$c) = (0,0); # Page & character counter +    my ($x0, $y0, $x1, $y1) = (1<<16, 1<<16, -(1<<16), -(1<<16)); +    while (<GSOUT>) { +        if ($_ =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/) { +            $x0 = $1 if $1 < $x0; +            $y0 = $2 if $2 < $y0; +            $x1 = $3 if $3 > $x1; +            $y1 = $4 if $4 > $y1; +            unless (defined $quiet) { +                my $s = "[" . ++$p . "] "; +                $c += length $s; +                if ($c >= 80) { +                    print LOG "\n"  or  die "Can't close: $!"; +                    $c = length $s; +                } +                print LOG $s  or  die "Can't close: $!"; +            } +        } +    } +    close GSOUT  or  die "Can't close: $!";; +    print LOG "\n"  or  die "Can't close: $!"  unless  defined $quiet; +     +    # No zombie processes +    waitpid $pid, 0; +    die "Can't run `" . &printcmd (@cmd) . "'"  if  $? >> 8; +     +    die "Error when calculating bounding box" if ($x0 >= $x1 || $y0 >= $y1); +    @bbox = ($x0, $y0, $x1, $y1); + +    # Let's go back to the beginning of the input +    seek IN, 0, 0  or  die "$!"; + +} elsif (defined $inwidth and defined $inheight) { +    @bbox = (0, 0, $inwidth, $inheight);  } else { -    *PSSELECT = *PSIN; +    # Guess page size from the input file +    +    # To avoid to seek into IN, it gonna be copied from WRITE to READ +    # in background, once the Bounding Box has been read +    pipe *READ, *WRITE  or  die "Can't pipe: $!"; + +    while (not (@bbox) && defined (my $l = <IN>)) { +        print WRITE $l  or  die "Can't close: $!"; +        @bbox = ($1, $2, $3, $4) +            if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/); +    } +    +    die "Cannot guess input page size!"  unless  @bbox; + +    unless (my $pid = fork) { +        # Child: cat IN > WRITE in background +        die "Can't fork: $!" unless defined $pid; +        close READ  or  die "Can't close: $!";; +         +        while (<IN>) { +            print WRITE  or  die "Can't close: $!"; +        } +        exit; +    } +    # Parent + +    close WRITE  or  die "Can't close: $!"; +    close IN  or  die "Can't close: $!"; +         +    open *IN, "<&READ"  or  die "Can't dup: $!";  }  # -# Resize file to our paper +# Calculate PStoPS specification  # -@cmd = ('psresize2.pl', "-p$paper", "-m$mresize"); -push @cmd, "-c" if defined $crop; -push @cmd, '-q'  if  defined $quiet; +if (($outwidth > $bbox[3] - $bbox[1]) xor ($bbox[2] - $bbox[0] > $outheight)) { +    ($outwidth, $outheight) = ($outheight, $outwidth); +} +my ($x0,$x1) = &calculate_coordinates($outwidth , $margin); +my ($y0,$y1) = &calculate_coordinates($outheight, $margin); -my $pid = open2 *PSRESIZE, "<&PSSELECT", @cmd; -push @pids, [$pid, @cmd]; -# Note: open2 closes the filehandles for us :) +my $rotation; +my $spec = 0 . &calc_pstops_page(@bbox, $x0, $y0, $x1, $y1); + + +# +# Run the program and filter the output +# +@cmd = ('pstops', "-w$outwidth", "-h$outheight", "$spec"); +push @cmd, '-q'  if  defined $quiet; +my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; +open *IN, "<&OUT"  or  die "Can't dup: $!"; @@ -368,10 +560,9 @@ if (defined $book) {      @cmd = ('psbook');      push @cmd, '-q'  if  defined $quiet; -    my $pid = open2 *PSBOOK, "<&PSRESIZE", @cmd; +    my $pid = open2 *OUT, "<&IN", @cmd; +    open *IN, "<&OUT"  or  die "Can't dup: $!";      push @pids, [$pid, @cmd]; -} else { -    *PSBOOK = *PSRESIZE;  } @@ -379,33 +570,33 @@ if (defined $book) {  #  # PSNup  #  -if ($nup > 1) { -    @cmd = ('psnup', "-p$paper", "-m$mnup", "-$nup"); +if (1<<$nup > 1) { +    my ($inwidth, $inheight) = ($outwidth, $outheight); +    if ($nup % 2) { +        ($outwidth, $outheight) = ($outheight, $outwidth); +    } +    $nup = 1<<$nup; +    @cmd = ('psnup', "-W$inwidth", "-H$inheight", +                     "-w$outwidth", "-h$outheight", +                     "-m$mnup", "-$nup");      push @cmd, '-q'  if  defined $quiet; -    my $pid = open2 *PSOUT, "<&PSBOOK", @cmd; +    my $pid = open2 *OUT, "<&IN", @cmd; +    open *IN, "<&OUT"  or  die "Can't dup: $!";      push @pids, [$pid, @cmd]; -} else { -    *PSOUT = *PSBOOK;  }  # -# Final file +# Final file: Convert back to PDF  # -if ($filetype eq "PDF") { -    # Convert back to PDF - -    @cmd = ('ps2pdf', "-dEmbedAllFonts=true", "-sPAPERSIZE=$paper", '-', '-'); -    $pid = open2 ">&FOUT", "<&PSOUT", @cmd; -    push @pids, [$pid, @cmd]; -} else { -    # cat > FOUT -    while (<PSOUT>) { -        print FOUT  or  die "Can't print: $!"; -    } -} +@cmd = ('gs', "-sDEVICE=pdfwrite", "-sOutputFile=%stdout%", "-dBATCH", +              "-dNOPAUSE", "-dAutoRotatePages=/None", +              "-c", "<< /Orientation 0 /PageSize [$outwidth $outheight] >> setpagedevice", +              "-f", "-"); +$pid = open2 ">&FOUT", "<&IN", @cmd; +push @pids, [$pid, @cmd];  # Avoid zombies @@ -425,10 +616,86 @@ unlink @auxfiles; +# Useless, but Perl doesn't see that this filehandle is used more than +# one time (and even automatically closed by `open3') +exit; +close GSIN; + +  # =========================================================  # +# Calculate an item of the pstops specification +# +sub calc_pstops_page { +    my ($fx0, $fy0, $fx1, $fy1, +        $tx0, $ty0, $tx1, $ty1) = @_; + +    # From and to width / height +    my ($wf, $hf) = ($fx1 - $fx0,  $fy1 - $fy0); +    my ($wt, $ht) = ($tx1 - $tx0,  $ty1 - $ty0); + +    # Check if rotation required (in our case, should always be 0) +    my $rotation = (($wf > $hf) xor ($wt > $ht)); + +    # Scale factor width / height +    my ($sw, $sh); +    if ($rotation) { +        ($sw, $sh) = ($ht / $wf,  $wt / $hf); +    } else { +        ($sw, $sh) = ($wt / $wf,  $ht / $hf); +    } + +    # We take the smallest scale +    my $scale = ($sw > $sh) ? $sh : $sw; +     +    # Calculate the centers of the boxes +    my ($cxf, $cyf) = ( .5 * ($fx0 + $fx1),  .5 * ($fy0 + $fy1) ); +    my ($cxt, $cyt) = ( .5 * ($tx0 + $tx1),  .5 * ($ty0 + $ty1) ); + +    # First, PStoPs scales, then rotates, then moves +    ($cxf, $cyf) = ($cxf * $scale, $cyf * $scale); +    if ($rotation) { +        if ($rotdir eq 'L') { +            ($cxf, $cyf) = (-$cyf, $cxf); +        } else { +            ($cxf, $cyf) = ($cyf, -$cxf); +        } +    } else { +        $rotdir = ''; +    } +    my ($movex, $movey) = ($cxt - $cxf,  $cyt - $cyf); + +    # Generate the summary +    return sprintf( '%s@%.3f(%.3f,%.3f)', $rotdir, $scale, $movex, $movey); +} + + + +# +# Calculate the begining and ending coordinates, after shaving 2 times +# the margin +# +sub calculate_coordinates { +    my ($length, $margin) = @_; +    my $skip     = $length - $margin; +    my $outwidth = $skip - $margin; +    return ( &round( &round($skip) - $outwidth ), &round($skip) ); +} + + + +# +# Round a float number +#  +sub round { +    return floor ($_[0] + .5); +} + + + +#  # In-place convert the given length to PostScript points  #  sub topoints { @@ -453,6 +720,49 @@ sub topoints {  } + +# +# In-place set the given width and height to the predefined papersize +# +sub papersize { +    my ($p,$w,$h) = @_; +    $p = lc $p; + +    if ($p eq "a0") { +        ($$w,$$h) = ("841mm",  "1189mm"); +    } elsif ($p eq "a1") { +        ($$w,$$h) = ("594mm",  "841mm"); +    } elsif ($p eq "a2") { +        ($$w,$$h) = ("420mm",  "594mm"); +    } elsif ($p eq "a3") { +        ($$w,$$h) = ("297mm",  "420mm"); +    } elsif ($p eq "a4") { +        ($$w,$$h) = ("210mm",  "297mm"); +    } elsif ($p eq "a5") { +        ($$w,$$h) = ("148mm",  "210mm"); +    } elsif ($p eq "letter") { +        ($$w,$$h) = ("8.5in",  "11in"); +    } elsif ($p eq "legal") { +        ($$w,$$h) = ("8.5in",  "14in"); +    } elsif ($p eq "tabloid") { +        ($$w,$$h) = ("11in",   "17in"); +    } elsif ($p eq "statement") { +        ($$w,$$h) = ("5.5in",  "8.5in"); +    } elsif ($p eq "executive") { +        ($$w,$$h) = ("7.25in", "10.5in"); +    } elsif ($p eq "folio") { +        ($$w,$$h) = ("8.27in", "13in"); +    } elsif ($p eq "quarto") { +        ($$w,$$h) = ("9in",    "11in"); +    } elsif ($p eq "10x14") { +        ($$w,$$h) = ("10in",   "14in"); +    } else { +        die "Unknown paper size: `$p'"; +    } +} + + +  #  # Print a command just like you'd do in a shell  # @@ -468,3 +778,4 @@ sub printcmd {      join ' ', @cmd;   } + | 
