From 7944c09286c112f5d2880c4c4d3fdf9a4a86aa1a Mon Sep 17 00:00:00 2001 From: Guilhem Moulin Date: Wed, 7 Mar 2012 01:10:19 +0100 Subject: unicode, re --- videorm.pl | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'videorm.pl') diff --git a/videorm.pl b/videorm.pl index bbdc5c2..3f58d4d 100755 --- a/videorm.pl +++ b/videorm.pl @@ -1,4 +1,4 @@ -#!/usr/bin/perl -CAL +#!/usr/bin/perl -CADS # This program is free software. It comes without any warranty, to the # extent permitted by applicable law. You can redistribute it and/or @@ -11,6 +11,8 @@ $VERSION = "0.1, 09 January 2012"; use warnings; use strict; +use utf8; +use feature "unicode_strings"; use Getopt::Long qw/:config posix_default no_ignore_case gnu_compat bundling auto_version auto_help/; @@ -19,7 +21,6 @@ use DBI; use File::Basename; use File::Spec::Functions qw /catfile catdir splitdir updir/; use File::Copy; -use Cwd qw /realpath/; use Env qw /HOME/; ################################################################################ @@ -53,7 +54,8 @@ B [B<-q>] [B<-f>] I =head1 DISCLAIMER -Your collection is assumed to have the following structure: two +Your collection is assumed to be encoded in UTF-8, and to have the following +structure: two folders, I and I, that have the same parent. I contains one subdirectory for each director, and each movie lies (B) in the subdirectory of its director. @@ -155,9 +157,11 @@ my $file = basename ($ARGV[0]); my $file_s = catfile ($symlinks, $file); my $file_d; $file_d = catfile ($symlinks, readlink $file_s) if -l $file_s; +Encode::_utf8_on($file_d); my $director; if (-l $file_s) { my @director = File::Spec->splitdir(readlink $file_s); + map {Encode::_utf8_on($_)} @director; $director = $director[2] if $#director >= 2; } @@ -176,7 +180,7 @@ $dbh->do( "SET NAMES UTF8" ) or die "Error: Can't set names to UTF-8.\n"; # Lookup for the file in the videodb database my $res = $dbh->selectall_arrayref ( "SELECT id FROM $config{prefix}videodata WHERE filename = ?", - undef, $file ) + undef, &iconv($file) ) or die "Can't select: " .$dbh->errstr. "\n"; die "Error: Your collection is not sane! (and `--force' is not set).\n" @@ -229,3 +233,19 @@ sub ack { $r = 1; } } + +# Convert to UTF-8 +# The input string should be in latin1 unless the UTF-8 flag is on. +sub iconv { + my $string = $_[0]; + return unless defined $string; + + unless (Encode::is_utf8($string)) { + $string = Encode::decode( 'latin1', $string, 1 ); + } + + # The UTF-8 flag should be on now + warn "Warning: Not a valid Unicode string: \"$string\".\n" + unless utf8::valid($string); + return $string; +} -- cgit v1.2.3