Unverified Commit a1760977 authored by IKEDA Soji's avatar IKEDA Soji Committed by GitHub
Browse files

Merge pull request #840 from dverdin/urlize2 by dverdin

Urlize mode bug fixes
parents 1fa6e702 c3c27da1
......@@ -30,6 +30,7 @@ install:
- cpan-install --coverage
- cpanm --installdeps --notest --with-develop --with-feature=soap --with-feature=ldap .
- cpanm --notest --quiet Unicode::CaseFold
- cpanm --notest --quiet DBD::SQLite
before_script:
- coverage-setup
......@@ -40,6 +41,7 @@ script:
- cd src; make; cd ..
- make check-local TEST_FILES='xt/perltidy.t' || true
- make check-local TEST_FILES='t/compile_executables.t t/compile_modules.t t/Language.t t/compile_scenarios.t t/parse_templates.t t/pod-syntax.t'
- make check-local TEST_FILES='t/Message.t'
after_success:
- coverage-report
......
......@@ -2091,7 +2091,7 @@ sub _urlize_parts {
## Only multipart/mixed messages are modified.
my $eff_type = $entity->effective_type || 'text/plain';
unless ($eff_type eq 'multipart/mixed') {
unless ($eff_type eq 'multipart/mixed' or $eff_type eq 'multipart/alternative' or $eff_type eq 'multipart/related') {
return undef;
}
......@@ -2101,39 +2101,59 @@ sub _urlize_parts {
return undef;
}
## Clean up Message-ID
my $dir1 = Sympa::Tools::Text::escape_chars($message_id);
## Clean up Message-ID and preventing double percent encoding.
my $dir1 = Sympa::Tools::Text::encode_filesystem_safe($message_id);
#XXX$dir1 = '/' . $dir1;
unless (mkdir "$expl/$dir1", 0775) {
$log->syslog('err', 'Unable to create urlized directory %s/%s',
$expl, $dir1);
return 0;
}
return _urlize_sub_parts($entity, $list, $message_id, $dir1, 0);
}
sub _urlize_sub_parts {
my $entity = shift;
my $list = shift;
my $message_id = shift;
my $directory = shift;
my $i = shift;
my @parts = ();
my $i = 0;
use Data::Dumper;
my $parent_eff_type = $entity->effective_type();
foreach my $part ($entity->parts) {
my $p = _urlize_one_part($part->dup, $list, $dir1, $i);
if (defined $p) {
my $eff_type = $part->effective_type || 'text/plain';
if ($eff_type eq 'multipart/mixed') {
$i++;
my $p = _urlize_sub_parts($part->dup, $list, $message_id, $directory, $i);
push @parts, $p;
} elsif (($eff_type eq 'multipart/alternative' or $eff_type eq 'multipart/related') and $i < 2) {
$i++;
my $p = _urlize_sub_parts($part->dup, $list, $message_id, $directory, $i);
push @parts, $p;
} else {
push @parts, $part;
my $p = _urlize_one_part($part->dup, $list, $directory, $i, $parent_eff_type);
if (defined $p) {
push @parts, $p;
$i++;
} else {
push @parts, $part;
}
}
}
if ($i) {
## Replace message parts
$entity->parts(\@parts);
}
$entity->parts(\@parts);
return $entity;
}
sub _urlize_one_part {
my $entity = shift;
my $list = shift;
my $dir = shift;
my $i = shift;
my $entity = shift;
my $list = shift;
my $dir = shift;
my $i = shift;
my $parent_eff_type = shift;
return undef unless ($parent_eff_type eq 'multipart/mixed');
my $expl = $list->{'dir'} . '/urlized';
my $listname = $list->{'name'};
......@@ -2148,6 +2168,15 @@ sub _urlize_one_part {
$filename = Encode::encode_utf8($filename)
if Encode::is_utf8($filename);
} else {
my $content_disposition = lc($entity->head->mime_attr('Content-Disposition') // '');
if ($entity->effective_type =~ m{\Atext}
&& (!$content_disposition
|| $content_disposition eq 'attachment'
)
&& $entity->head->mime_attr('content-type.charset')
){
return undef;
}
my $fileExt = Conf::get_mime_type($entity->effective_type || '')
|| 'bin';
$filename = sprintf 'msg.%d.%s', $i, $fileExt;
......
#!/usr/bin/perl
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id: tools_data.t 8606 2013-02-06 08:44:02Z rousse $
use strict;
use warnings;
use Data::Dumper;
use English;
use File::Path qw(make_path rmtree);
use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove);
use FindBin qw($Bin);
use lib "$Bin/../src/lib";
use lib 't/stub';
use Test::More;
BEGIN {
use_ok('Sympa::Message');
}
my $tmp_dir = 't/tmp';
my $db_dir = $tmp_dir.'/db';
my $home_dir = $tmp_dir.'/list_data';
my $etc_dir = $tmp_dir.'/etc';
my $test_list_name = 'test';
%Conf::Conf = (
domain => 'lists.example.com', # mandatory
listmaster => 'dude@example.com', # mandatory
lang => 'en-US',
sender_headers => 'From',
tmpdir => $tmp_dir,
db_type => 'SQLite',
db_name => $db_dir.'/message-test-db.sqlite',
update_db_field_types => 'auto',
home => $home_dir,
etc => $etc_dir,
cache_list_config => '',
supported_lang => 'en-US',
filesystem_encoding => 'utf-8',
urlize_min_size => 0,
);
if (-d $tmp_dir) {
rmtree($tmp_dir);
}
make_path($tmp_dir);
make_path($db_dir);
make_path($home_dir);
dircopy('t/data/list_data/', $home_dir);
make_path($etc_dir);
my $log = Sympa::Log->instance;
$log->{log_to_stderr} = 'err';
if (-f $Conf::Conf{db_name}) {
unlink $Conf::Conf{db_name};
}
open my $fileHandle, ">", "$Conf::Conf{db_name}" or die "Can't create '$Conf::Conf{db_name}'\n";
close $fileHandle;
my $sdm = Sympa::DatabaseManager->instance;
Sympa::DatabaseManager::probe_db();
my $list = Sympa::List->new($test_list_name, '*');
$list->_update_list_db;
my $root_url = '/attach/test/';
my @to_urlize = (
{
test_case => 'simple',
filename => 't/samples/urlize-simple.eml',
attachments =>
[
{
name => 'attachment.pdf',
escaped_name => 'attachment.pdf',
},
],
dirname => 'simple@example.com',
escaped_dirname => 'simple%40example.com',
},
{
test_case => 'simple with several attachments',
filename => 't/samples/urlize-simple-mutiple-attachments.eml',
attachments =>
[
{
name => 'attachment.pdf',
escaped_name => 'attachment.pdf',
},
{
name => 'text.txt',
escaped_name => 'text.txt',
},
{
name => 'image.png',
escaped_name => 'image.png',
},
],
dirname => 'simple@example.com',
escaped_dirname => 'simple%40example.com',
},
{
test_case => 'encoding',
filename => 't/samples/urlize-encoding.eml',
attachments =>
[
{
name => 'ございます.pdf',
escaped_name => '%25e3%2581%2594%25e3%2581%259',
},
],
dirname => 'globuz_24_3c_3e_25@example.com',
escaped_dirname => 'globuz_24_3c_3e_25%40example.com',
},
{
test_case => 'nested in multipart/mixed message',
filename => 't/samples/urlize-nested-mixed.eml',
attachments =>
[
{
name => 'Würzburg.txt',
escaped_name => 'W%25c3%25bcrzburg.txt',
},
],
dirname => '3_24@domain.tld',
escaped_dirname => '3_24%40domain.tld',
},
{
test_case => 'nested in multipart/alternative message',
filename => 't/samples/urlize-nested-alternative.eml',
attachments =>
[
{
name => 'globuz.pdf',
escaped_name => 'globuz.pdf',
},
],
dirname => '4_24@domain.tld',
escaped_dirname => '4_24%40domain.tld',
},
{
test_case => 'Deep nested message',
filename => 't/samples/urlize-deep-nested-mixed.eml',
attachments =>
[
{
name => 'Würzburg.txt',
escaped_name => 'W%25c3%25bcrzburg.txt',
},
{
name => 'msg.3.bin',
escaped_name => 'msg.3.bin',
},
],
dirname => 'deep-nested@domain.tld',
escaped_dirname => 'deep-nested%40domain.tld',
},
{
test_case => 'Related/alternative nested message',
filename => 't/samples/urlize-nested-alternative-and-related.eml',
attachments =>
[
{
name => 'document.pdf',
escaped_name => 'document.pdf',
},
],
dirname => 'alt-nested@domain.tld',
escaped_dirname => 'alt-nested%40domain.tld',
},
);
foreach my $test_file (@to_urlize) {
my $to_urlize_file = $test_file->{filename};
my $lock_fh = Sympa::LockedFile->new($to_urlize_file, -1, '+<');
my $to_urlize_string = do { local $RS; <$lock_fh> };
my $to_urlize = Sympa::Message->new($to_urlize_string);
my $parser = MIME::Parser->new;
$parser->extract_nested_messages(0);
$parser->extract_uuencode(1);
$parser->output_to_core(1);
$parser->tmp_dir($Conf::Conf{'tmpdir'});
my $msg_string = $to_urlize->as_string;
$msg_string =~ s/\AReturn-Path: (.*?)\n(?![ \t])//s;
my $entity = $parser->parse_data($msg_string);
my $new_entity = Sympa::Message::_urlize_parts($entity, $list, $to_urlize->{'message_id'});
### Preparation done. Actual testing starts here.
my $urlized_directory;
opendir my $dh, $home_dir.'/'.$test_list_name.'/urlized/';
foreach my $file (readdir $dh) {
next if $file =~ m{\A\.+\Z};
$urlized_directory = $file; last;
}
closedir $dh;
is($urlized_directory, $test_file->{dirname}, 'Test case: '.$test_file->{test_case}.' - Directory where urlized parts are stored correctly escaped.');
ok(! -f $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory.'/msg.0.bin', 'Test case: '.$test_file->{test_case}.' - The text of the message has not been converted to binary attachment.') ;
ok(! -f $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory.'/msg.0.txt', 'Test case: '.$test_file->{test_case}.' - The text of the message has not been converted to text attachment.') ;
my @expected_files;
foreach my $file (@{$test_file->{attachments}}) {
ok( -f "$home_dir/$test_list_name/urlized/$urlized_directory/$file->{name}", 'Test case: '.$test_file->{test_case}.' - The attachment '.$file->{name}.' has been stored on the filesystem.') ;
if (-f "$home_dir/$test_list_name/urlized/$urlized_directory/$file->{name}") {
push @expected_files, $file->{name};
}
my $found_url_to_attachment = 0;
foreach my $line (split '\n', $new_entity->as_string()) {
my $line_to_match = $root_url.$test_file->{escaped_dirname}.'/'.$file->{escaped_name};
if ($line =~ m{$line_to_match}) {
$found_url_to_attachment = 1;
last;
}
}
is( $found_url_to_attachment, 1, 'Test case: '.$test_file->{test_case}.' - The attachment '.$file->{name}.' stored on the filesystem has an URL to retrieve it in the new message.');
}
my @found_files;
opendir my $dh2, "$home_dir/$test_list_name/urlized/$urlized_directory/";
foreach my $file (readdir $dh2) {
next if $file =~ m{\A\.+\Z};
push @found_files, $file;
}
closedir $dh2;
my $total_expected_files = $#expected_files+1;
is($#found_files, $#expected_files, 'Test case: '.$test_file->{test_case}.' - Found the urlized attachments (total: '.$total_expected_files.') and only them.');
rmtree $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory;
}
rmtree $tmp_dir;
done_testing();
merge_feature off
msg_topic_tagging optional
topics Computing
invite public
priority 0
review private
shared_doc
d_read owner
d_edit private
quota 150000000
available_user_options
reception digest,digestplain,html,mail,nomail,not_me,notice,summary,txt,urlize
owner
visibility noconceal
email owner@example.com
profile privileged
reception mail
unsubscribe owner
custom_subject test
archive
web_access private
mail_access owner
period month
reply_to_header
value sender
apply respect
status open
subject A test list
footer_type append
info conceal
subscribe owner
add auth
serial 230
send editorkeyonly
tracking
tracking default
delivery_status_notification on
retention_period 90
message_disposition_notification on
creation
date_epoch 1288885499
email owner@example.com
date 01 apr 1997 at 16:44:59
remind listmaster
process_archive on
del auth
rfc2369_header_fields archive,help,owner,post,unsubscribe
visibility conceal
update
date_epoch 1573046671
email owner@example.com
digest 0,1,2,3,4,5,6 2:50
max_size 12000000
\ No newline at end of file
Return-Path: sender@domain.tld
From: Ye Olde Sender <sender@domain.tld>
Subject: =?UTF-8?Q?Message_=c3=a0_urlizer?=
To: my list <test@lists.example.com>
Message-ID: <deep-nested@domain.tld>
Date: Thu, 19 Dec 2019 11:03:19 +0100
MIME-Version: 1.0
Content-type: multipart/mixed; boundary="________boundary-level1"
This is a multi-part attachment message with deeply nested multipart sub-parts.
--________boundary-level1
Content-Type: multipart/alternative;
boundary="________boundary-level2"
--________boundary-level2
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=utf-8
A simple text part. It is alternative with a multipart/mixed part.
--________boundary-level2
Content-Type: multipart/mixed;
boundary="________boundary-level3"
--________boundary-level3
Content-Transfer-Encoding: quoted-printable
Content-Type: text/html;
charset=utf-8
<html>
<head></head>
<body>A simple HTML part. It is alongside with a text attachment and a
multipart/alternative that should both be urlized.</body>
</html>
--________boundary-level3
Content-Disposition: attachment;
filename="=?UTF-8?Q?W=c3=bcrzburg=2etxt?="
Content-Type: text/plain;
name="=?UTF-8?Q?W=c3=bcrzburg=2etxt?="
charset=us-ascii
Content-Transfer-Encoding: 8bit
This is a text file attached to the message. It should be urlized.
--________boundary-level3
Content-Type: multipart/alternative;
boundary="________boundary-level4"
--________boundary-level4
Content-Type: multipart/alternative;
boundary="________boundary-level5"
--________boundary-level5
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=utf-8
A simple text part. The whole multipart/alternative part should be urlized.
--________boundary-level5
Content-Type: multipart/mixed;
boundary="________boundary-level6"
--________boundary-level6
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=utf-8
A simple text part.
--________boundary-level6
Content-Type: application/png;
name="image.png"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename="image.png"
JVBERi0xLjQNJeLjz9MNCjUgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgMTIzNzgxNi9PIDcv
--________boundary-level6--
--________boundary-level5--
--________boundary-level4--
--________boundary-level3--
--________boundary-level2--
--________boundary-level1--
From: Ye Olde Sender <sender@domain.tld>
Subject: =?UTF-8?Q?Message_=c3=a0_urlizer?=
To: my list <test@lists.example.com>
Message-ID: <globuz$<>%@example.com>
Date: Thu, 19 Dec 2019 11:03:19 +0100
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="________boundary-level1"
Content-Language: en-US
This is a multi-part message in MIME format.
--________boundary-level1
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 8bit
This message contains an attachment with non-ASCII chracters an a message-id with tricky characters.
--________boundary-level1
Content-Type: application/pdf;
name="=?UTF-8?B?44GU44GW44GE44G+44GZLnBkZg==?="
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename*0*=UTF-8''%E3%81%94%E3%81%96%E3%81%84%E3%81%BE%E3%81%99%2E%70%64;
filename*1*=%66
JVBERi0xLjQNJeLjz9MNCjUgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgMTIzNzgxNi9PIDcv
--________boundary-level1--
Return-Path: sender@domain.tld
From: Ye Olde Sender <sender@domain.tld>
Subject: =?UTF-8?Q?Message_=c3=a0_urlizer?=
To: my list <test@lists.example.com>
Message-ID: <alt-nested@domain.tld>
Date: Thu, 19 Dec 2019 11:03:19 +0100
MIME-Version: 1.0
Content-type: multipart/mixed; boundary="________boundary-level1"
This is a multi-part attachment message with nested multipart/related and multipart/alternative.
--________boundary-level1
Content-Type: multipart/related;
boundary="________boundary-level2";
type="multipart/alternative"
This is a multipart/related containing a multipart/alternative and an image
--________boundary-level2
Content-Type: multipart/alternative;
boundary="________boundary-level3"
This is a multipart/alternative containing a text/plain and a text/html.
--________boundary-level3
Content-Type: text/plain; charset="iso-8859-1"
Content-Transfer-Encoding: quoted-printable
This is the text/palin part.
--________boundary-level3
Content-Type: text/html; charset="iso-8859-1"
Content-Transfer-Encoding: quoted-printable
<html><head/><body>This is the text/html part.</body></html>
--________boundary-level3--
--________boundary-level2
Content-Type: image/jpeg; name="image.jpg"
Content-Description: image.jpg
Content-Disposition: inline; filename="image.jpg"; size=2236;
creation-date="Mon, 03 Feb 2020 08:18:35 GMT";
modification-date="Mon, 03 Feb 2020 08:18:35 GMT"
Content-ID: <image.jpg@01D5DA72.E8F8BD00>
Content-Transfer-Encoding: base64
/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAoHBwkHBgoJCAkLCwoMDxkQDw4ODx4WFxIZJCAmJSMg
--________boundary-level2--
--________boundary-level1
Content-Type: application/pdf; name="document.pdf"
Content-Description: document.pdf
Content-Disposition: attachment; <