Commit 270539fa authored by sikeda's avatar sikeda
Browse files

[dev] New functions decode_filesystem_safe() and encode_filesystem_safe() in...

[dev] New functions decode_filesystem_safe() and encode_filesystem_safe() in Sympa::Tools::Text.  Since they give more consistent result and are safer, they may be used instead of escape_chars(), qencode_filename() and so on in the future.


git-svn-id: https://subversion.renater.fr/sympa/branches/sympa-6.2-branch@12433 05aa8bb8-cd2b-0410-b1d7-8918dfa770ce
parent 778879b6
......@@ -26,6 +26,7 @@ package Sympa::Tools::Text;
use strict;
use warnings;
use Encode qw();
use Text::LineFold;
use if (5.008 < $] && $] < 5.016), qw(Unicode::CaseFold fc);
use if (5.016 <= $]), qw(feature fc);
......@@ -64,6 +65,25 @@ sub wrap_text {
return $text;
}
sub decode_filesystem_safe {
my $str = shift;
return '' unless defined $str and length $str;
$str = Encode::encode_utf8($str) if Encode::is_utf8($str);
# On case-insensitive filesystem "_XX" along with "_xx" should be decoded.
$str =~ s/_([0-9A-Fa-f]{2})/chr hex "0x$1"/eg;
return $str;
}
sub encode_filesystem_safe {
my $str = shift;
return '' unless defined $str and length $str;
$str = Encode::encode_utf8($str) if Encode::is_utf8($str);
$str =~ s/([^-+.0-9\@A-Za-z])/sprintf '_%02x', ord $1/eg;
return $str;
}
sub foldcase {
my $str = shift;
return '' unless defined $str and length $str;
......@@ -134,6 +154,49 @@ Default is C<78>.
=back
=item decode_filesystem_safe ( $str )
I<Function>.
Decodes a string encoded by encode_filesystem_safe().
Parameter:
=over
=item $str
String to be decoded.
=back
Returns:
Decoded string, stripped C<utf8> flag if any.
=item encode_filesystem_safe ( $str )
I<Function>.
Encodes a string $str to be suitable for filesystem.
Parameter:
=over
=item $str
String to be encoded.
=back
Returns:
Encoded string, stripped C<utf8> flag if any.
All bytes except C<'-'>, C<'+'>, C<'.'>, C<'@'>
and alphanumeric characters are encoded to sequences C<'_'> followed by
two hexdigits.
Note that C<'/'> will also be encoded.
=item foldcase ( $str )
I<Function>.
......@@ -159,4 +222,11 @@ A string.
=back
=head1 HISTORY
L<Sympa::Tools::Text> appeared on Sympa 6.2a.41.
decode_filesystem_safe() and encode_filesystem_safe() were added
on Sympa 6.2.10.
=cut
......@@ -167,7 +167,8 @@ sub load_edit_list_conf {
#sub escape_regexp ($s);
# Escape weird characters
# FIXME: Should not use.
# ToDo: This should be obsoleted: Would be better to use
# Sympa::Tools::Text::encode_filesystem_safe().
sub escape_chars {
my $s = shift;
my $except = shift; ## Exceptions
......@@ -197,6 +198,8 @@ sub escape_chars {
## Escape shared document file name
## Q-decode it first
# ToDo: This should be obsoleted: Would be better to use
# Sympa::Tools::Text::encode_filesystem_safe().
sub escape_docname {
my $filename = shift;
my $except = shift; ## Exceptions
......@@ -223,6 +226,8 @@ sub unicode_to_utf8 {
}
## Q-Encode web file name
# ToDo: This should be obsoleted: Would be better to use
# Sympa::Tools::Text::encode_filesystem_safe().
sub qencode_filename {
my $filename = shift;
......@@ -255,6 +260,8 @@ sub qencode_filename {
}
## Q-Decode web file name
# ToDo: This should be obsoleted: Would be better to use
# Sympa::Tools::Text::encode_filesystem_safe().
sub qdecode_filename {
my $filename = shift;
......@@ -269,6 +276,8 @@ sub qdecode_filename {
}
## Unescape weird characters
# ToDo: This should be obsoleted: Would be better to use
# Sympa::Tools::Text::decode_filesystem_safe().
sub unescape_chars {
my $s = shift;
......@@ -384,6 +393,7 @@ sub escape_quote {
## Q-encode a complete file hierarchy
## Useful to Q-encode subshared documents
# ToDo: See a comment on tools::qencode_filename().
sub qencode_hierarchy {
my $dir = shift; ## Root directory
my $original_encoding = shift; ## Suspected original encoding of filenames
......
......@@ -4,20 +4,46 @@
use strict;
use warnings;
use Encode qw();
use Test::More;
use Sympa::Tools::Text;
plan tests => 3;
plan tests => 12;
my $email = q{&'+-./09=_A@Z.a-z};
my $unicode_email =
qq{\x{60c5}\x{5831}\@\x{30c9}\x{30e1}\x{30a4}\x{30f3}\x{540d}\x{4f8b}.jp};
my $email = q{&'+-./09=A@Z.a-z};
is Sympa::Tools::Text::canonic_email($email),
q{&'+-./09=a@z.a-z}, 'canonic_email';
q{&'+-./09=_a@z.a-z}, 'canonic_email';
is Sympa::Tools::Text::canonic_email("\t\r\n "), undef,
'canonic_email, whitespaces';
is Sympa::Tools::Text::canonic_email(undef), undef,
'canonic_email, undefined value';
is Sympa::Tools::Text::encode_filesystem_safe($email),
q{_26_27+-._2f09_3d_5fA@Z.a-z}, 'encode_filesystem_safe';
is Sympa::Tools::Text::encode_filesystem_safe(undef), '',
'encode_filesystem_safe, undefined value';
my $enc = Sympa::Tools::Text::encode_filesystem_safe($unicode_email);
is $enc,
q{_e6_83_85_e5_a0_b1@_e3_83_89_e3_83_a1_e3_82_a4_e3_83_b3_e5_90_8d_e4_be_8b.jp},
'encode_filesystem_safe, Unicode';
ok !Encode::is_utf8($enc), 'encode_filesystem_safe, utf8 flag';
is Sympa::Tools::Text::decode_filesystem_safe(q{_26_27+-._2f09_3d_5fA@Z.a-z}),
$email, 'decode_filesystem_safe';
is Sympa::Tools::Text::decode_filesystem_safe(q{_26_27+-._2F09_3D_5FA@Z.a-z}),
$email, 'decode_filesystem_safe, uppercase';
is Sympa::Tools::Text::decode_filesystem_safe(undef), '',
'decode_filesystem_safe, undefined value';
my $dec = Sympa::Tools::Text::decode_filesystem_safe(
q{_e6_83_85_e5_a0_b1@_e3_83_89_e3_83_a1_e3_82_a4_e3_83_b3_e5_90_8d_e4_be_8b.jp}
);
ok !Encode::is_utf8($dec), 'decode_filesystem_safe, utf8 flag';
Encode::_utf8_on($dec);
is $dec, $unicode_email, 'decode_filesystem_safe, Unicode';
# ToDo: foldcase()
# ToDo: wrap_text()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment