summaryrefslogtreecommitdiffstats
path: root/videodb-check.pl
diff options
context:
space:
mode:
authorGuilhem Moulin <guilhem.moulin@ens-lyon.org>2012-03-07 01:10:19 +0100
committerGuilhem Moulin <guilhem.moulin@ens-lyon.org>2012-03-07 01:12:06 +0100
commit7944c09286c112f5d2880c4c4d3fdf9a4a86aa1a (patch)
tree2c596e7d4f519201377ea291472425dd34b2f901 /videodb-check.pl
parentdfd75162377462580e9a5fff833f3ab309157524 (diff)
unicode, re
Diffstat (limited to 'videodb-check.pl')
-rwxr-xr-xvideodb-check.pl48
1 files changed, 40 insertions, 8 deletions
diff --git a/videodb-check.pl b/videodb-check.pl
index dae4cdb..b9fb4ca 100755
--- a/videodb-check.pl
+++ b/videodb-check.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl -CAL
+#!/usr/bin/perl -CADS
# This program is free software. It comes without any warranty, to the
# extent permitted by applicable law. You can redistribute it and/or
@@ -10,6 +10,8 @@ $VERSION = "0.1, 27 November 2011";
use warnings;
use strict;
+use utf8;
+use feature "unicode_strings";
use DBI;
use Pod::Usage;
@@ -44,7 +46,8 @@ B<videodb-check.pl>
=head1 DISCLAIMER
-Your collection is assumed to have the following structure: two
+Your collection is assumed to be encoded in UTF-8, and to have the following
+structure: two
folders, I<DIRECTORS> and I<MOVIES>, that have the same parent.
I<DIRECTORS> contains one subdirectory for each director, and each movie
lies (B<as a regular file>) in the subdirectory of its director.
@@ -161,6 +164,11 @@ my @links; # List of missing symlinks
my @files; # List of symlinks that have a non existing/valid target
my @dirs; # List of symlinks that target to a wrong director
foreach (@$res) {
+ # The database is using UTF-8, but the UTF-8 flag may be missing;
+ # restore it.
+ Encode::_utf8_on($_->[0]);
+ Encode::_utf8_on($_->[1]);
+
my @ls = File::Spec->splitdir($_->[0]);
my $l = File::Spec->catfile ($symlinks, $ls[0]);
@@ -170,6 +178,7 @@ foreach (@$res) {
}
my @target = File::Spec->splitdir (readlink $l);
+ map {Encode::_utf8_on($_)} @target;
push @files, $_->[0]
unless ( $target[0] eq File::Spec->updir()
and $target[1] eq "DIRECTORS"
@@ -199,10 +208,14 @@ while (my $l = readdir(DIR)) {
next if $l eq File::Spec->curdir();
next if $l eq File::Spec->updir();
+ Encode::_utf8_on($l);
my $f = File::Spec->catfile($symlinks, $l);
- if ( -d Cwd::realpath($f) ) {
- opendir (SUBDIR, Cwd::realpath($f))
- or die "Can't open dir `" .Cwd::realpath($f). ".:" .$!. "\n";
+ Encode::_utf8_on($f);
+ my $rf = Cwd::realpath($f);
+ Encode::_utf8_on($rf);
+ if ( -d $rf ) {
+ opendir (SUBDIR, $rf)
+ or die "Can't open dir `" .$rf. ".:" .$!. "\n";
while (my $d = readdir(SUBDIR)) {
next if $d eq File::Spec->curdir();
next if $d eq File::Spec->updir();
@@ -221,7 +234,7 @@ undef @files;
my $sth = $dbh->prepare ( "SELECT id FROM $config{prefix}videodata WHERE filename = ?" )
or die "Error: " .$dbh->errstr;
foreach (@filelist) {
- $sth->execute ($_) or die "Can't select: " .$dbh->errstr. "\n";
+ $sth->execute (&iconv($_)) or die "Can't select: " .$dbh->errstr. "\n";
my @res = $sth->fetchrow_array;
die $sth->errstr if $sth->err;
@@ -249,9 +262,9 @@ exit $r;
sub report {
my ($str, $bad) = @_;
if (@$bad) {
- print LOG "* " .$str. ":\n";
+ print LOG &iconv("* " .$str. ":\n");
foreach (@$bad) {
- print LOG " ", $_, "\n";
+ print LOG &iconv(" " .$_. "\n");
}
$r = 1;
}
@@ -262,5 +275,24 @@ sub check {
FROM $config{prefix}videodata
WHERE " .$_[1] )
or die "Can't select: " .$dbh->errstr. "\n";
+ # The database is using UTF-8, but the UTF-8 flag may be missing;
+ # restore it.
+ Encode::_utf8_on($_->[0]);
&report ("The following files have " .$_[0], [map {$_->[0]} @$bad]);
}
+
+# Convert to UTF-8
+# The input string should be in latin1 unless the UTF-8 flag is on.
+sub iconv {
+ my $string = $_[0];
+ return unless defined $string;
+
+ unless (Encode::is_utf8($string)) {
+ $string = Encode::decode( 'latin1', $string, 1 );
+ }
+
+ # The UTF-8 flag should be on now
+ warn "Warning: Not a valid Unicode string: \"$string\".\n"
+ unless utf8::valid($string);
+ return $string;
+}