diff options
-rwxr-xr-x | pdftool.pl | 337 |
1 files changed, 172 insertions, 165 deletions
@@ -12,7 +12,6 @@ $VERSION = "0.4, 12 May 2011"; use Getopt::Long qw(:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help); use Pod::Usage; -use IPC::Open2; use IPC::Open3; use POSIX qw(floor); use Error qw(:try); @@ -28,7 +27,7 @@ pdftool.pl - a PDF swiss army knife B<pdftool.pl> [B<-w> I<width> ] [B<-h> I<heigth>] [B<-p> I<paper>] [B<-W> I<width>] [B<-H> I<heigth>] [B<-P> I<paper>] [B<-s> I<pages>] [B<-m> I<margin>] [B<-b> I<border>] [B<-c>] [B<--book>] [B<--column>] -[B<-n> I<nup>] [B<--screen>] [B<-q>] [I<infile> [I<outfile>]] +[B<-n> I<num>] [B<--screen>] [B<-q>] [I<infile> [I<outfile>]] =head1 DESCRIPTION @@ -36,7 +35,7 @@ B<PDFTool> combines the tools in the PSUtils bundle in a nice way. The input should be either a Portable Document Format (PDF) file, or a PostScript file. The output format is PDF only. -If no input file is given, or if a single hyphen-minus (B<->) is given as +If no input file is given, or if a single hyphen-minus (I<->) is given as file name, B<PDFTool> will read the PDF or PostScript data from the standard input. In that case, and if the input data is in PDF format, an auxiliary file will be created (since the conversion from PDF to PS @@ -44,7 +43,7 @@ requires random access to the data), and removed afterwards. Also, if the crop option (B<-c>) is set, an auxiliary file will be created, and removed afterwards. -If no output file is given, or if a single hyphen-minus (B<->) is given as +If no output file is given, or if a single hyphen-minus (I<->) is given as file name, B<PDFTool> will send the data to the standard output. By default, B<PDFTool> rotates the pages in order to ensure that your pdf will @@ -303,69 +302,25 @@ die "Bad nup: `$nup'" if # # Open input and output files # -my $infile_display; +my ($FIN, $FOUT); if (defined $infile && $infile ne "-") { - open FIN, '<', "$infile" or die "Can't read `$infile': $!"; - $infile_display = $infile; + open $FIN, '<', $infile or die "Can't read `$infile': $!"; } else { undef $infile; - *FIN = *STDIN; - $infile_display = "(stdin)"; + $FIN = *STDIN; } -# After the pipe, it won't be detected as seekable -my $inseek = (seek FIN, 0, 1) ? 1 : undef; if (defined $outfile && $outfile ne "-") { - open FOUT, '>', "$outfile" or die "Can't create `$outfile': $!"; + open $FOUT, '>', "$outfile" or die "Can't create `$outfile': $!"; } else { - *FOUT = *STDOUT; + $FOUT = *STDOUT; } *LOG = *STDERR; -# -# Detect filetype -# -# To avoid to seek into FIN, it gonna be copied from WRITE to READ in -# the background, once the filetype has been read - -# TODO: read specifications, to properly detect the filetype -my $filetype; -pipe *READ, *WRITE or die "Can't pipe: $!"; -while (not (defined $filetype) && defined (my $l = <FIN>)) { - print WRITE $l or die "Can't close: $!"; - - if (defined $l && $l =~ /^%!PS/) { - $filetype = "PS"; - } elsif (defined $l && $l =~ /^%PDF/) { - $filetype = "PDF"; - } -} - -die "Cannot recognise FileType" unless defined $filetype; - -unless (my $pid = fork) { - # Child: cat FIN > WRITE in background - die "Can't fork: $!" unless defined $pid; - close READ or die "Can't close: $!"; - - while (<FIN>) { - print WRITE or die "Can't print: $!"; - } - exit; -} -# Parent - -close WRITE or die "Can't close: $!"; -close FIN or die "Can't close: $!"; - -open *FIN, "<&READ" or die "Can't dup: $!"; - - - # Auxiliary files, to remove my @auxfiles; @@ -376,17 +331,17 @@ my @pids; my $return = 0; try { - &pdftops(); + my $FD = &pdftops($FIN); - &psselect() if defined $select; + $FD = &psselect($FD) if defined $select; - my @bbox = &psbbox(); + my ($FD2, @bbox) = &psbbox($FD); - &psbook() if defined $book; + $FD2 = &psbook($FD2) if defined $book; - my ($landscape,$rotate) = &psnup (@bbox); + my ($FD3, $landscape, $rotate) = &psnup ($FD2, @bbox); - &pstopdf ($landscape,$rotate); + &pstopdf ($FD3, $FOUT, $landscape, $rotate); } catch Error with { @@ -411,7 +366,7 @@ finally { # Close opened file handles map { close $_ or die "Can't close: $!" } - ( *READ, *FIN, *FOUT ); + ( $FIN, $FOUT ); # Delete auxiliary files unlink @auxfiles; @@ -420,8 +375,9 @@ finally { }; # Useless, but Perl doesn't see that this filehandle is used more than -# one time (and even automatically closed by `open3') -close GSIN; +# one time +close IN; # automatically closed by `open3' +close OUT; @@ -431,138 +387,184 @@ close GSIN; # # Conversion from PDF to PS, if necessary -# Reads from FIN, writes to IN # sub pdftops { - if ($filetype eq "PDF") { - unless (defined $infile && $inseek) { - # Need to copy the whole input to an auxiliary file, since - # conversion from PDF to PS requires random access to the data + my $IN = $_[0]; + my $OUT; + + # + # Detect filetype + # + # To avoid to seek into IN, it gonna be copied from WRITE to READ in + # the background, once the filetype has been read + # + # TODO: read specifications, to properly detect the filetype + + my $filetype; + my ($READ, $WRITE); + pipe $READ, $WRITE or die "Can't pipe: $!"; + + while (not (defined $filetype) && defined (my $l = <$IN>)) { + print $WRITE ($l) or die "Can't close: $!"; + + if (defined $l && $l =~ /^%!PS/) { + $filetype = "PS"; + } elsif (defined $l && $l =~ /^%PDF/) { + $filetype = "PDF"; + } + } + + die "Can't recognize the filetype" unless defined $filetype; + + unless (my $pid = fork) { + # Child: cat $IN > $WRITE in background + die "Can't fork: $!" unless defined $pid; + close $READ or die "Can't close: $!"; + + while (<$IN>) { + print $WRITE ($_) or die "Can't print: $!"; + } + exit; + } + + # Parent + close $WRITE or die "Can't close: $!"; - $infile = "$tmpdir/pdftool-stdin-$$." . lc $filetype; - open FINAUX, '>', $infile - or die "Can't write into `$infile': $!"; - push @auxfiles, $infile; + return $READ if $filetype eq "PS"; - # cat > $infile - while (<FIN>) { - print FINAUX or die "Can't print: $!"; - } - close FINAUX; + + # + # Conversion from PDF to PS + # + unless (defined $infile) { + # Need to copy the whole input to an auxiliary file, since + # conversion from PDF to PS requires random access to the data + + $infile = "$tmpdir/pdftool-stdin-$$." . lc $filetype; + + open my $AUX, '>', $infile + or die "Can't write into `$infile': $!"; + push @auxfiles, $infile; + + # cat > $infile + while (<$READ>) { + print $AUX ($_) or die "Can't print: $!"; } + close $AUX; + } - my ($first, $last); - # pdftops doesn't provide any way to have page numbers relative to - # the end of the document, hence there is no detection of the - # smallest interval if $select contains `_' - if (defined $select && not $select =~ /_/) { - # Convert to PS only the pages we are interested in - ($first, $last) = (1<<16,-(1<<16)); - for (split /,/, $select) { - $_ =~ /^(\d*)(-?)(\d*)$/; - my ($rmin,$sep,$rmax) = ($1,$2,$3); - undef $first if $sep && not $rmin; - undef $last if $sep && not $rmax; - if ($rmin) { - $first = $rmin if defined $first && $rmin < $first; - $last = $rmin if defined $last && $rmin > $last; - } - if ($rmax) { - $first = $rmax if defined $first && $rmax < $first; - $last = $rmax if defined $last && $rmax > $last; - } + my ($first, $last); + # pdftops doesn't provide any way to have page numbers relative to + # the end of the document, hence there is no detection of the + # smallest interval if $select contains `_' + if (defined $select && not $select =~ /_/) { + # Convert to PS only the pages we are interested in + ($first, $last) = (1<<16,-(1<<16)); + for (split /,/, $select) { + $_ =~ /^(\d*)(-?)(\d*)$/; + my ($rmin,$sep,$rmax) = ($1,$2,$3); + undef $first if $sep && not $rmin; + undef $last if $sep && not $rmax; + if ($rmin) { + $first = $rmin if defined $first && $rmin < $first; + $last = $rmin if defined $last && $rmin > $last; } - - # Calculate the new page range - my @newselect; - for (split /,/, $select) { - $_ =~ /^(\d*)(-?)(\d*)$/; - my ($rmin,$sep,$rmax) = ($1,$2,$3); - if (defined $first) { - $rmin -= $first-1 if $rmin; - $rmax -= $first-1 if $rmax; - } - push @newselect, "$rmin$sep$rmax"; + if ($rmax) { + $first = $rmax if defined $first && $rmax < $first; + $last = $rmax if defined $last && $rmax > $last; } - - $select = join ',', @newselect; } - # Convert to PS - my @cmd = ('pdftops', '-origpagesizes', "$infile", '-'); - push @cmd, '-f', $first if defined $first; - push @cmd, '-l', $last if defined $last; - push @cmd, '-q' if defined $quiet; - my $pid = open *PSIN, "-|", @cmd - or die "Can't run `" . &printcmd (@cmd) . "'"; - push @pids, [$pid, @cmd]; + # Calculate the new page range + my @newselect; + for (split /,/, $select) { + $_ =~ /^(\d*)(-?)(\d*)$/; + my ($rmin,$sep,$rmax) = ($1,$2,$3); + if (defined $first) { + $rmin -= $first-1 if $rmin; + $rmax -= $first-1 if $rmax; + } + push @newselect, "$rmin$sep$rmax"; + } - } else { - open *PSIN, "<&FIN" or die "Can't dup: $!"; + $select = join ',', @newselect; } - open *IN, "<&PSIN" or die "Can't dup: $!"; + # Convert to PS + my @cmd = ('pdftops', '-origpagesizes', $infile, '-'); + push @cmd, '-f', $first if defined $first; + push @cmd, '-l', $last if defined $last; + push @cmd, '-q' if defined $quiet; + + my $pid = open $OUT, "-|", @cmd + or die "Can't run `" . &printcmd (@cmd) . "'"; + push @pids, [$pid, @cmd]; + + return $OUT; } # # Select some pages in the document -# Reads/writes from/to IN # sub psselect { + my $IN = $_[0]; + my $OUT; + my @cmd = ('psselect', '-p', $select); push @cmd, '-q' if defined $quiet; - my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; + *IN = $IN; + my $pid = open3 '<&IN', $OUT, '>&LOG', @cmd; push @pids, [$pid, @cmd]; - open *IN, "<&OUT" or die "Can't dup: $!"; + return $OUT; } # # Detect / calculate the bounding box -# Reads/writes from/to IN # sub psbbox { - my @bbox; + my $IN = $_[0]; + my ($OUT, @bbox); if (defined $crop) { # Calculate the maximal bounding box - unless (seek IN, 0, 1) { + unless (seek $IN, 0, 1) { # The input is not seekable: have to create a seekable auxiliary # file my $auxfile = "$tmpdir/pdftool-stdin-$$.ps"; - open AUXFD, '>', "$auxfile" + open my $AUX, '>', $auxfile or die "Can't write into `$auxfile': $!"; push @auxfiles, $auxfile; # cat > $auxfile - while (<IN>) { - print AUXFD or die "Can't print: $!"; + while (<$IN>) { + print $AUX ($_) or die "Can't print: $!"; } - close AUXFD or die "Can't close: $!"; - close IN or die "Can't close: $!"; + close $AUX or die "Can't close: $!"; + close $IN or die "Can't close: $!"; - open IN, '<', "$auxfile" or die "Can't read `$auxfile': $!"; + open $IN, '<', $auxfile or die "Can't read `$auxfile': $!"; } # Need to duplicate IN, since it will be closed in the parent process - open *GSIN, '<&IN'; + open *IN, '<&=', $IN or die "Can't fdopen: $!"; my @cmd = ('gs', '-sDEVICE=bbox', '-dBATCH', '-dNOPAUSE', '-'); - my $pid = open3 "<&GSIN", ">&GSOUT", *GSOUT, @cmd; + my $pid = open3 "<&IN", ">&OUT", *OUT, @cmd; my ($p,$c) = (0,0); # Page & character counter my ($x0, $y0, $x1, $y1) = (1<<16, 1<<16, -(1<<16), -(1<<16)); - while (<GSOUT>) { + while (<OUT>) { if ($_ =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/) { $x0 = $1 if $1 < $x0; $y0 = $2 if $2 < $y0; @@ -572,15 +574,15 @@ sub psbbox { my $s = "[" . ++$p . "] "; $c += length $s; if ($c >= 80) { - print LOG "\n" or die "Can't close: $!"; + print LOG "\n" or die "Can't print: $!"; $c = length $s; } - print LOG $s or die "Can't close: $!"; + print LOG $s or die "Can't print: $!"; } } } - close GSOUT or die "Can't close: $!"; - print LOG "\n" or die "Can't close: $!" unless defined $quiet; + close OUT or die "Can't close: $!"; + print LOG "\n" or die "Can't print: $!" unless defined $quiet; # No zombie processes waitpid $pid, 0; @@ -591,72 +593,77 @@ sub psbbox { @bbox = ($x0, $y0, $x1, $y1); # Let's go back to the beginning of the input - seek IN, 0, 0 or die "$!"; + seek $IN, 0, 0 or die "Can't seek: $!"; + $OUT = $IN; } elsif (defined $inwidth and defined $inheight) { @bbox = (0, 0, $inwidth, $inheight); + $OUT = $IN; } else { # Guess page size from the input file # To avoid to seek into IN, it gonna be copied from WRITE to READ # in background, once the Bounding Box has been read - pipe *READ, *WRITE or die "Can't pipe: $!"; + my ($READ, $WRITE); + pipe $READ, $WRITE or die "Can't pipe: $!"; - while (not (@bbox) && defined (my $l = <IN>)) { - print WRITE $l or die "Can't close: $!"; + while (not (@bbox) && defined (my $l = <$IN>)) { + print $WRITE ($l) or die "Can't print: $!"; @bbox = ($1, $2, $3, $4) if ($l =~ m/^\%\%BoundingBox: (\d+) (\d+) (\d+) (\d+)/); } - die "Cannot guess input page size!" unless @bbox; + die "Cannot guess input page size" unless @bbox; unless (my $pid = fork) { # Child: cat IN > WRITE in background die "Can't fork: $!" unless defined $pid; - close READ or die "Can't close: $!";; + close $READ or die "Can't close: $!";; - while (<IN>) { - print WRITE or die "Can't close: $!"; + while (<$IN>) { + print $WRITE ($_) or die "Can't print: $!"; } exit; } # Parent - close WRITE or die "Can't close: $!"; - close IN or die "Can't close: $!"; + close $WRITE or die "Can't close: $!"; - open *IN, "<&READ" or die "Can't dup: $!"; + $OUT = $READ; } - return @bbox; + + return ($OUT, @bbox); } # # PSBook -# Reads/writes from/to IN # sub psbook { + my $IN = $_[0]; + my $OUT; + my @cmd = ('psbook'); push @cmd, '-q' if defined $quiet; - my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; + *IN = $IN; + my $pid = open3 "<&IN", $OUT, ">&LOG", @cmd; push @pids, [$pid, @cmd]; - open *IN, "<&OUT" or die "Can't dup: $!"; + return $OUT; } # # PSNup (inlined here, to keep track of the possible rotation) -# Reads/writes from/to IN # sub psnup { - my @bbox = @_; + my ($IN, @bbox) = @_; - my ($landscape, $rotate); + my ($OUT, $landscape, $rotate); if ((($bbox[2]-$bbox[0] > $bbox[3]-$bbox[1]) and not ($outwidth-2*$margin > $outheight-2*$margin)) @@ -778,22 +785,20 @@ sub psnup { my @cmd = ('pstops', '-w', $bbox[2], '-h', $bbox[3], $pagespecs); push @cmd, '-q' if defined $quiet; - my $pid = open3 "<&IN", *OUT, ">&LOG", @cmd; + *IN = $IN; + my $pid = open3 "<&IN", $OUT, ">&LOG", @cmd; push @pids, [$pid, @cmd]; - open *IN, "<&OUT" or die "Can't dup: $!"; - - return ($landscape,$rotate); + return ($OUT, $landscape, $rotate); } # # Final file: Convert back to PDF -# Reads from IN, writes to FOUT # sub pstopdf { - my ($landscape,$rotate) = @_; + my ($IN, $OUT, $landscape, $rotate) = @_; my ($ow,$oh) = ($outwidth,$outheight); ($ow,$oh) = ($oh,$ow) if $rotate%2; @@ -811,7 +816,8 @@ sub pstopdf { "-c", "<< $pagedevice >> setpagedevice", "-f", "-"); - my $pid = open3 "<&IN", ">&FOUT", ">&LOG", @cmd; + (*IN,*OUT) = ($IN,$OUT); + my $pid = open3 "<&IN", ">&OUT", ">&LOG", @cmd; push @pids, [$pid, @cmd]; } @@ -898,7 +904,7 @@ sub printcmd { for (@_) { my $s = $_; $s =~ s/"/\\"/; - $s = "\"$s\"" if $s =~ /[ ()';#{}*?~&|`]/; + $s = "\"$s\"" if $s =~ /[ ()';#{}*?~&|`!]/; push @cmd, $s; } @@ -918,3 +924,4 @@ sub min { return $n if $n < $m; return $m; } + |