From 7944c09286c112f5d2880c4c4d3fdf9a4a86aa1a Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Wed, 7 Mar 2012 01:10:19 +0100 Subject: unicode, re --- videoadd.pl | 3 +- videodb-check.pl | 48 +++++++++++++++++++++++++------ videomv.pl | 86 +++++++++++++++++++++++++++++++++++++++++++++++--------- videorm.pl | 28 +++++++++++++++--- 4 files changed, 138 insertions(+), 27 deletions(-) diff --git a/videoadd.pl b/videoadd.pl index 8ad29df..ac77e61 100755 --- a/videoadd.pl +++ b/videoadd.pl @@ -64,7 +64,8 @@ B [B<-s>] [B<-u>] [B<--ignore-db>] [B<--dont-move>] [B<--force>] [B =head1 DISCLAIMER -Your collection is assumed to have the following structure: two +Your collection is assumed to be encoded in UTF-8, and to have the following +structure: two folders, I and I, that have the same parent. I contains one subdirectory for each director, and each movie lies (B) in the subdirectory of its director. diff --git a/videodb-check.pl b/videodb-check.pl index dae4cdb..b9fb4ca 100755 --- a/videodb-check.pl +++ b/videodb-check.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -CAL +#!/usr/bin/perl -CADS # This program is free software. It comes without any warranty, to the # extent permitted by applicable law. You can redistribute it and/or @@ -10,6 +10,8 @@ $VERSION = "0.1, 27 November 2011"; use warnings; use strict; +use utf8; +use feature "unicode_strings"; use DBI; use Pod::Usage; @@ -44,7 +46,8 @@ B =head1 DISCLAIMER -Your collection is assumed to have the following structure: two +Your collection is assumed to be encoded in UTF-8, and to have the following +structure: two folders, I and I, that have the same parent. I contains one subdirectory for each director, and each movie lies (B) in the subdirectory of its director. @@ -161,6 +164,11 @@ my @links; # List of missing symlinks my @files; # List of symlinks that have a non existing/valid target my @dirs; # List of symlinks that target to a wrong director foreach (@$res) { + # The database is using UTF-8, but the UTF-8 flag may be missing; + # restore it. + Encode::_utf8_on($_->[0]); + Encode::_utf8_on($_->[1]); + my @ls = File::Spec->splitdir($_->[0]); my $l = File::Spec->catfile ($symlinks, $ls[0]); @@ -170,6 +178,7 @@ foreach (@$res) { } my @target = File::Spec->splitdir (readlink $l); + map {Encode::_utf8_on($_)} @target; push @files, $_->[0] unless ( $target[0] eq File::Spec->updir() and $target[1] eq "DIRECTORS" @@ -199,10 +208,14 @@ while (my $l = readdir(DIR)) { next if $l eq File::Spec->curdir(); next if $l eq File::Spec->updir(); + Encode::_utf8_on($l); my $f = File::Spec->catfile($symlinks, $l); - if ( -d Cwd::realpath($f) ) { - opendir (SUBDIR, Cwd::realpath($f)) - or die "Can't open dir `" .Cwd::realpath($f). ".:" .$!. "\n"; + Encode::_utf8_on($f); + my $rf = Cwd::realpath($f); + Encode::_utf8_on($rf); + if ( -d $rf ) { + opendir (SUBDIR, $rf) + or die "Can't open dir `" .$rf. ".:" .$!. "\n"; while (my $d = readdir(SUBDIR)) { next if $d eq File::Spec->curdir(); next if $d eq File::Spec->updir(); @@ -221,7 +234,7 @@ undef @files; my $sth = $dbh->prepare ( "SELECT id FROM $config{prefix}videodata WHERE filename = ?" ) or die "Error: " .$dbh->errstr; foreach (@filelist) { - $sth->execute ($_) or die "Can't select: " .$dbh->errstr. "\n"; + $sth->execute (&iconv($_)) or die "Can't select: " .$dbh->errstr. "\n"; my @res = $sth->fetchrow_array; die $sth->errstr if $sth->err; @@ -249,9 +262,9 @@ exit $r; sub report { my ($str, $bad) = @_; if (@$bad) { - print LOG "* " .$str. ":\n"; + print LOG &iconv("* " .$str. ":\n"); foreach (@$bad) { - print LOG " ", $_, "\n"; + print LOG &iconv(" " .$_. "\n"); } $r = 1; } @@ -262,5 +275,24 @@ sub check { FROM $config{prefix}videodata WHERE " .$_[1] ) or die "Can't select: " .$dbh->errstr. "\n"; + # The database is using UTF-8, but the UTF-8 flag may be missing; + # restore it. + Encode::_utf8_on($_->[0]); &report ("The following files have " .$_[0], [map {$_->[0]} @$bad]); } + +# Convert to UTF-8 +# The input string should be in latin1 unless the UTF-8 flag is on. +sub iconv { + my $string = $_[0]; + return unless defined $string; + + unless (Encode::is_utf8($string)) { + $string = Encode::decode( 'latin1', $string, 1 ); + } + + # The UTF-8 flag should be on now + warn "Warning: Not a valid Unicode string: \"$string\".\n" + unless utf8::valid($string); + return $string; +} diff --git a/videomv.pl b/videomv.pl index dd66375..6cc3a71 100755 --- a/videomv.pl +++ b/videomv.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -CAL +#!/usr/bin/perl -CADS # This program is free software. It comes without any warranty, to the # extent permitted by applicable law. You can redistribute it and/or @@ -11,6 +11,8 @@ $VERSION = "0.2, 10 August 2011"; use warnings; use strict; +use utf8; +use feature "unicode_strings"; use Getopt::Long qw/:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help/; @@ -54,7 +56,8 @@ I [{I,I}I[I]] =head1 DISCLAIMER -Your collection is assumed to have the following structure: two +Your collection is assumed to be encoded in UTF-8, and to have the following +structure: two folders, I and I, that have the same parent. I contains one subdirectory for each director, and each movie lies (B) in the subdirectory of its director. @@ -351,7 +354,9 @@ GetOptions( "lookup-db"=> \$lookupdb_flag, pod2usage(2) if ($#ARGV < 0 or $#ARGV > 1); *LOG = *STDERR unless defined (fileno LOG); +map { Encode::_utf8_on($_) } @ARGV; my ($old_path,$new_path) = @ARGV; + $new_path = $symlinks unless defined $new_path; die "Error: incompatible options.\n" if @@ -360,21 +365,34 @@ die "Error: incompatible options.\n" if die "Error: `" .$old_path. "' is neither a symlink nor a plain file.\n" unless (-l $old_path or -f $old_path); -$new_path = catfile ($new_path, basename($old_path)) - if -d realpath($new_path); +my $new_rpath = realpath($new_path); +Encode::_utf8_on($new_rpath); +$new_path = catfile ($new_path, basename($old_path)) if -d $new_rpath; die "Error: `" .$new_path. "' exists.\n" if (-l $new_path or -f $new_path); my $old_filename = basename($old_path); my $new_filename = basename($new_path); +map { Encode::_utf8_on($_) } ($old_filename, $new_filename); + +my $old_dirname = dirname($old_path); +my $new_dirname = dirname($new_path); +map { Encode::_utf8_on($_) } ($old_dirname, $new_dirname); + +my $old_rdirname = realpath($old_dirname); +my $new_rdirname = realpath($new_dirname); +map { Encode::_utf8_on($_) } ($old_rdirname, $new_rdirname); # Get the real paths, {..,symlink}-collapse and so on -my $real_old_path = catfile (realpath (dirname($old_path)), $old_filename); -my $real_new_path = catfile (realpath (dirname($new_path)), $new_filename); +my $real_old_path = catfile ($old_rdirname, $old_filename); +my $real_new_path = catfile ($new_rdirname, $new_filename); +map { Encode::_utf8_on($_) } ($real_old_path, $real_new_path); my $real_symlinks = realpath($symlinks); -my @real_symlinks = splitdir($real_symlinks); my $real_directors = realpath($directors); +map { Encode::_utf8_on($_) } ($real_symlinks, $real_directors); + +my @real_symlinks = splitdir($real_symlinks); my @real_directors = splitdir($real_directors); @@ -396,9 +414,17 @@ unless (defined $ignoredb_flag) { $RES = $dbh->selectall_hashref ( "SELECT id,director,title FROM $config{prefix}videodata WHERE filename = ?", - 'id', undef, $old_filename + 'id', undef, &iconv($old_filename) ) or die "Can't select: " .$dbh->errstr. "\n"; + + # The database is using UTF-8, but the UTF-8 flag may be missing; + # restore it. + foreach my $id (keys $RES) { + foreach my $k (keys $RES->{$id}) { + Encode::_utf8_on($RES->{$id}->{$k}); + } + } $nRES = scalar (keys %$RES); # The ID of the first movie found in the database, if any @@ -430,7 +456,7 @@ if (&is_symlink($real_new_path)) { # Create directory if it doesn't exist my $new_dirname = catfile ($directors, $new_director); unless (-d $new_dirname) { - print STDERR "Directory `$new_dirname' does not exist. "; + print STDERR &iconv("Directory `$new_dirname' does not exist. "); until (-d $new_dirname) { print STDERR "Should I create it? (Y/n) "; my $a = lc ; @@ -457,11 +483,13 @@ if (&is_symlink($real_new_path)) { unless -l $real_old_path; $old_target = realpath ($old_path); + Encode::_utf8_on($old_target); die "Error: `" .$old_path. "' is expected to target to `" .catfile(updir(),'DIRECTORS','*',$old_filename). "'.\n" unless (defined $new_director || &is_director(\$new_director, $old_target)); $new_target = catfile ($directors, $new_director, $new_filename); + Encode::_utf8_on($new_target); } elsif (&is_director(\$old_director, $real_old_path)) { @@ -469,10 +497,14 @@ if (&is_symlink($real_new_path)) { $old_symlink = catfile ($symlinks, $old_filename); die "Error: `" .$old_symlink. "' is expected to be a symlink.\n" unless -l $old_symlink; + my $old_rsymlink = realpath($old_symlink); + my $old_rpath = realpath($old_path); + map {Encode::_utf8_on($_)} ($old_rsymlink, $old_rpath); + die "Error: `" .$old_symlink. "' is expected to target to `" .catfile(updir(),'DIRECTORS',$old_director,$old_filename) ."'.\n" - unless realpath($old_symlink) eq realpath($old_path); + unless $old_rsymlink eq $old_rpath; $old_target = $old_path; @@ -506,6 +538,7 @@ elsif (&is_director(\$new_director, $real_new_path)) { $old_symlink = $old_path; $old_target = realpath ($old_path); + Encode::_utf8_on($old_target); } elsif (&is_director(\$old_director, $real_old_path)) { @@ -513,10 +546,13 @@ elsif (&is_director(\$new_director, $real_new_path)) { $old_symlink = catfile ($symlinks, $old_filename); die "Error: `" .$old_symlink. "' is expected to be a symlink.\n" unless -l $old_symlink; + my $old_rsymlink = realpath($old_symlink); + my $old_rpath = realpath($old_path); + map {Encode::_utf8_on($_)} ($old_rsymlink, $old_rpath); die "Error: `" .$old_symlink. "' is expected to target to `" .catfile(updir(),'DIRECTORS',$old_director,$old_filename) ."'.\n" - unless realpath($old_symlink) eq realpath($old_path); + unless $old_rsymlink eq $old_rpath; $old_target = $old_path; } @@ -536,7 +572,10 @@ else { my @actions; # Successful actions -if (dirname (realpath $old_target) eq dirname (realpath $new_target)) { +my $old_rtarget = realpath ($old_target); +my $new_rtarget = realpath ($new_target); +map {Encode::_utf8_on($_)} ($old_rtarget, $new_rtarget); +if (dirname ($old_rtarget) eq dirname ($new_rtarget)) { &perform ("Renaming target... ", rename $old_target, $new_target) unless ($old_filename eq $new_filename); } @@ -547,11 +586,12 @@ else { my $old_tar; if (defined $old_symlink) { $old_tar = readlink $old_symlink; + Encode::_utf8_on($old_tar); &perform ("Deleting old symlink... ", unlink $old_symlink) unless $r; } unless ($r) { - opendir my $DIR, catdir($directors, $old_director) or die "Can't open: $!"; + opendir my $DIR, catdir($directors, $old_director) or die "Can't opendir" .catdir($directors, $old_director).": $!"; if (scalar(grep(!/^\.\.?$/, readdir $DIR) == 0)) { &perform ("Removing empty dir... ", rmdir catdir($directors, $old_director)); } @@ -578,7 +618,7 @@ unless ($r || defined ($ignoredb_flag) || $old_filename eq $new_filename) { } else { my $rv = $dbh->do ( "UPDATE $config{prefix}videodata SET filename = ? WHERE id = ?", - undef, $new_filename, $id + undef, &iconv($new_filename), $id ); &ack (\$r, $rv); } @@ -670,6 +710,7 @@ sub revert { # Check wether a path starts with $movies sub is_symlink { my @path = splitdir ($_[0]); + map { Encode::_utf8_on($_) } @path; my @real_symlinks = @real_symlinks; #local copy while ($#real_symlinks>=0 && $#path>=0 && $real_symlinks[0] eq $path[0]) { @@ -688,6 +729,7 @@ sub is_symlink { # first argument if that's the case sub is_director { my @path = splitdir ($_[1]); + map { Encode::_utf8_on($_) } @path; my @real_directors = @real_directors; #local copy while ($#real_directors>=0 && $#path>=0 && $real_directors[0] eq $path[0]) { @@ -702,3 +744,19 @@ sub is_director { return 0; # That's not a "director" } } + +# Convert to UTF-8 +# The input string should be in latin1 unless the UTF-8 flag is on. +sub iconv { + my $string = $_[0]; + return unless defined $string; + + unless (Encode::is_utf8($string)) { + $string = Encode::decode( 'latin1', $string, 1 ); + } + + # The UTF-8 flag should be on now + warn "Warning: Not a valid Unicode string: \"$string\".\n" + unless utf8::valid($string); + return $string; +} diff --git a/videorm.pl b/videorm.pl index bbdc5c2..3f58d4d 100755 --- a/videorm.pl +++ b/videorm.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -CAL +#!/usr/bin/perl -CADS # This program is free software. It comes without any warranty, to the # extent permitted by applicable law. You can redistribute it and/or @@ -11,6 +11,8 @@ $VERSION = "0.1, 09 January 2012"; use warnings; use strict; +use utf8; +use feature "unicode_strings"; use Getopt::Long qw/:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help/; @@ -19,7 +21,6 @@ use DBI; use File::Basename; use File::Spec::Functions qw /catfile catdir splitdir updir/; use File::Copy; -use Cwd qw /realpath/; use Env qw /HOME/; ################################################################################ @@ -53,7 +54,8 @@ B [B<-q>] [B<-f>] I =head1 DISCLAIMER -Your collection is assumed to have the following structure: two +Your collection is assumed to be encoded in UTF-8, and to have the following +structure: two folders, I and I, that have the same parent. I contains one subdirectory for each director, and each movie lies (B) in the subdirectory of its director. @@ -155,9 +157,11 @@ my $file = basename ($ARGV[0]); my $file_s = catfile ($symlinks, $file); my $file_d; $file_d = catfile ($symlinks, readlink $file_s) if -l $file_s; +Encode::_utf8_on($file_d); my $director; if (-l $file_s) { my @director = File::Spec->splitdir(readlink $file_s); + map {Encode::_utf8_on($_)} @director; $director = $director[2] if $#director >= 2; } @@ -176,7 +180,7 @@ $dbh->do( "SET NAMES UTF8" ) or die "Error: Can't set names to UTF-8.\n"; # Lookup for the file in the videodb database my $res = $dbh->selectall_arrayref ( "SELECT id FROM $config{prefix}videodata WHERE filename = ?", - undef, $file ) + undef, &iconv($file) ) or die "Can't select: " .$dbh->errstr. "\n"; die "Error: Your collection is not sane! (and `--force' is not set).\n" @@ -229,3 +233,19 @@ sub ack { $r = 1; } } + +# Convert to UTF-8 +# The input string should be in latin1 unless the UTF-8 flag is on. +sub iconv { + my $string = $_[0]; + return unless defined $string; + + unless (Encode::is_utf8($string)) { + $string = Encode::decode( 'latin1', $string, 1 ); + } + + # The UTF-8 flag should be on now + warn "Warning: Not a valid Unicode string: \"$string\".\n" + unless utf8::valid($string); + return $string; +} -- cgit v1.2.3