Commit 98df2840 authored by VERDIN David's avatar VERDIN David
Browse files

Adding test set for Message.pm. Only used to test urlize for now:

  - t/Message.t,
  - t/samples/ contains six different messages for testing urlize,
  - t/data/ now contains a test list configuration file.
parent e75e0859
#!/usr/bin/perl
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id: tools_data.t 8606 2013-02-06 08:44:02Z rousse $
use strict;
use warnings;
use Data::Dumper;
use English;
use File::Path qw(make_path rmtree);
use File::Copy::Recursive qw(fcopy rcopy dircopy fmove rmove dirmove);
use FindBin qw($Bin);
use lib "$Bin/../src/lib";
use Test::More;
BEGIN {
use_ok('Sympa::Message');
}
my $tmp_dir = 't/tmp';
my $db_dir = $tmp_dir.'/db';
my $home_dir = $tmp_dir.'/list_data';
my $etc_dir = $tmp_dir.'/etc';
my $test_list_name = 'test';
%Conf::Conf = (
domain => 'lists.example.com', # mandatory
listmaster => 'dude@example.com', # mandatory
lang => 'en-US',
sender_headers => 'From',
tmpdir => $tmp_dir,
db_type => 'SQLite',
db_name => $db_dir.'/message-test-db.sqlite',
update_db_field_types => 'auto',
home => $home_dir,
etc => $etc_dir,
cache_list_config => '',
supported_lang => 'en-US',
filesystem_encoding => 'utf-8',
urlize_min_size => 0,
);
if (-d $tmp_dir) {
rmtree($tmp_dir);
}
make_path($tmp_dir);
make_path($db_dir);
make_path($home_dir);
dircopy('t/data/list_data/', $home_dir);
make_path($etc_dir);
my $log = Sympa::Log->instance;
$log->{log_to_stderr} = 'err';
if (-f $Conf::Conf{db_name}) {
unlink $Conf::Conf{db_name};
}
open my $fileHandle, ">", "$Conf::Conf{db_name}" or die "Can't create '$Conf::Conf{db_name}'\n";
close $fileHandle;
my $sdm = Sympa::DatabaseManager->instance;
Sympa::DatabaseManager::probe_db();
my $list = Sympa::List->new($test_list_name, '*');
$list->_update_list_db;
my $root_url = '/attach/test/';
my @to_urlize = (
{
test_case => 'simple',
filename => 't/samples/urlize-simple.eml',
attachments =>
[
{
name => 'attachment.pdf',
escaped_name => 'attachment.pdf',
},
],
dirname => 'simple@example.com',
escaped_dirname => 'simple%40example.com',
},
{
test_case => 'simple with several attachments',
filename => 't/samples/urlize-simple-mutiple-attachments.eml',
attachments =>
[
{
name => 'attachment.pdf',
escaped_name => 'attachment.pdf',
},
{
name => 'text.txt',
escaped_name => 'text.txt',
},
{
name => 'image.png',
escaped_name => 'image.png',
},
],
dirname => 'simple@example.com',
escaped_dirname => 'simple%40example.com',
},
{
test_case => 'encoding',
filename => 't/samples/urlize-encoding.eml',
attachments =>
[
{
name => 'ございます.pdf',
escaped_name => '%25e3%2581%2594%25e3%2581%259',
},
],
dirname => 'globuz_24_3c_3e_25@example.com',
escaped_dirname => 'globuz_24_3c_3e_25%40example.com',
},
{
test_case => 'nested in multipart/mixed message',
filename => 't/samples/urlize-nested-mixed.eml',
attachments =>
[
{
name => 'Würzburg.txt',
escaped_name => 'W%25c3%25bcrzburg.txt',
},
],
dirname => '3_24@domain.tld',
escaped_dirname => '3_24%40domain.tld',
},
{
test_case => 'nested in multipart/alternative message',
filename => 't/samples/urlize-nested-alternative.eml',
attachments =>
[
{
name => 'globuz.pdf',
escaped_name => 'globuz.pdf',
},
],
dirname => '4_24@domain.tld',
escaped_dirname => '4_24%40domain.tld',
},
{
test_case => 'Deep nested message',
filename => 't/samples/urlize-deep-nested-mixed.eml',
attachments =>
[
{
name => 'Würzburg.txt',
escaped_name => 'W%25c3%25bcrzburg.txt',
},
{
name => 'msg.1.bin',
escaped_name => 'msg.1.bin',
},
],
dirname => 'deep-nested@domain.tld',
escaped_dirname => 'deep-nested%40domain.tld',
},
);
foreach my $test_file (@to_urlize) {
my $to_urlize_file = $test_file->{filename};
my $lock_fh = Sympa::LockedFile->new($to_urlize_file, -1, '+<');
my $to_urlize_string = do { local $RS; <$lock_fh> };
my $to_urlize = Sympa::Message->new($to_urlize_string);
my $parser = MIME::Parser->new;
$parser->extract_nested_messages(0);
$parser->extract_uuencode(1);
$parser->output_to_core(1);
$parser->tmp_dir($Conf::Conf{'tmpdir'});
my $msg_string = $to_urlize->as_string;
$msg_string =~ s/\AReturn-Path: (.*?)\n(?![ \t])//s;
my $entity = $parser->parse_data($msg_string);
my $new_entity = Sympa::Message::_urlize_parts($entity, $list, $to_urlize->{'message_id'});
### Preparation done. Actual testing starts here.
my $urlized_directory;
opendir my $dh, $home_dir.'/'.$test_list_name.'/urlized/';
foreach my $file (readdir $dh) {
next if $file =~ m{\A\.+\Z};
$urlized_directory = $file; last;
}
closedir $dh;
is($urlized_directory, $test_file->{dirname}, 'Test case: '.$test_file->{test_case}.' - Directory where urlized parts are stored correctly escaped.');
ok(! -f $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory.'/msg.0.bin', 'Test case: '.$test_file->{test_case}.' - The text of the message has not been converted to binary attachment.') ;
ok(! -f $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory.'/msg.0.txt', 'Test case: '.$test_file->{test_case}.' - The text of the message has not been converted to text attachment.') ;
my @expected_files;
foreach my $file (@{$test_file->{attachments}}) {
ok( -f "$home_dir/$test_list_name/urlized/$urlized_directory/$file->{name}", 'Test case: '.$test_file->{test_case}.' - The attachment '.$file->{name}.' has been stored on the filesystem.') ;
if (-f "$home_dir/$test_list_name/urlized/$urlized_directory/$file->{name}") {
push @expected_files, $file->{name};
}
my $found_url_to_attachment = 0;
foreach my $line (split '\n', $new_entity->as_string()) {
my $line_to_match = $root_url.$test_file->{escaped_dirname}.'/'.$file->{escaped_name};
if ($line =~ m{$line_to_match}) {
$found_url_to_attachment = 1;
last;
}
}
is( $found_url_to_attachment, 1, 'Test case: '.$test_file->{test_case}.' - The attachment '.$file->{name}.' stored on the filesystem has an URL to retrieve it in the new message.');
}
my @found_files;
opendir my $dh2, "$home_dir/$test_list_name/urlized/$urlized_directory/";
foreach my $file (readdir $dh2) {
next if $file =~ m{\A\.+\Z};
push @found_files, $file;
}
closedir $dh2;
my $total_expected_files = $#expected_files+1;
is($#found_files, $#expected_files, 'Test case: '.$test_file->{test_case}.' - Found the urlized attachments (total: '.$total_expected_files.') and only them.');
rmtree $home_dir.'/'.$test_list_name.'/urlized/'.$urlized_directory;
}
rmtree $tmp_dir;
done_testing();
merge_feature off
msg_topic_tagging optional
topics Computing
invite public
priority 0
review private
shared_doc
d_read owner
d_edit private
quota 150000000
available_user_options
reception digest,digestplain,html,mail,nomail,not_me,notice,summary,txt,urlize
owner
visibility noconceal
email owner@example.com
profile privileged
reception mail
unsubscribe owner
custom_subject test
archive
web_access private
mail_access owner
period month
reply_to_header
value sender
apply respect
status open
subject A test list
footer_type append
info conceal
subscribe owner
add auth
serial 230
send editorkeyonly
tracking
tracking default
delivery_status_notification on
retention_period 90
message_disposition_notification on
creation
date_epoch 1288885499
email owner@example.com
date 01 apr 1997 at 16:44:59
remind listmaster
process_archive on
del auth
rfc2369_header_fields archive,help,owner,post,unsubscribe
visibility conceal
update
date_epoch 1573046671
email owner@example.com
digest 0,1,2,3,4,5,6 2:50
max_size 12000000
\ No newline at end of file
Return-Path: sender@domain.tld
From: Ye Olde Sender <sender@domain.tld>
Subject: =?UTF-8?Q?Message_=c3=a0_urlizer?=
To: my list <test@lists.example.com>
Message-ID: <deep-nested@domain.tld>
Date: Thu, 19 Dec 2019 11:03:19 +0100
MIME-Version: 1.0
Content-type: multipart/mixed; boundary="________boundary-level1"
This is a multi-part attachment message with deeply nested multipart sub-parts.
--________boundary-level1
Content-Type: multipart/alternative;
boundary="________boundary-level2"
--________boundary-level2
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=utf-8
A simple text part. It is alternative with a multipart/mixed part.
--________boundary-level2
Content-Type: multipart/mixed;
boundary="________boundary-level3"
--________boundary-level3
Content-Transfer-Encoding: quoted-printable
Content-Type: text/html;
charset=utf-8
<html>
<head></head>
<body>A simple HTML part. It is alongside with a text attachment and a
multipart/alternative that should both be urlized.</body>
</html>
--________boundary-level3
Content-Disposition: attachment;
filename="=?UTF-8?Q?W=c3=bcrzburg=2etxt?="
Content-Type: text/plain;
name="=?UTF-8?Q?W=c3=bcrzburg=2etxt?="
charset=us-ascii
Content-Transfer-Encoding: 8bit
This is a text file attached to the message. It should be urlized.
--________boundary-level3
Content-Type: multipart/alternative;
boundary="________boundary-level4"
--________boundary-level4
Content-Type: multipart/alternative;
boundary="________boundary-level5"
--________boundary-level5
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=utf-8
A simple text part. The whole multipart/alternative part should be urlized.
--________boundary-level5
Content-Type: multipart/mixed;
boundary="________boundary-level6"
--________boundary-level6
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=utf-8
A simple text part.
--________boundary-level6
Content-Type: application/png;
name="image.png"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename="image.png"
JVBERi0xLjQNJeLjz9MNCjUgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgMTIzNzgxNi9PIDcv
--________boundary-level6--
--________boundary-level5--
--________boundary-level4--
--________boundary-level3--
--________boundary-level2--
--________boundary-level1--
From: Ye Olde Sender <sender@domain.tld>
Subject: =?UTF-8?Q?Message_=c3=a0_urlizer?=
To: my list <test@lists.example.com>
Message-ID: <globuz$<>%@example.com>
Date: Thu, 19 Dec 2019 11:03:19 +0100
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="________boundary-level1"
Content-Language: en-US
This is a multi-part message in MIME format.
--________boundary-level1
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 8bit
This message contains an attachment with non-ASCII chracters an a message-id with tricky characters.
--________boundary-level1
Content-Type: application/pdf;
name="=?UTF-8?B?44GU44GW44GE44G+44GZLnBkZg==?="
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename*0*=UTF-8''%E3%81%94%E3%81%96%E3%81%84%E3%81%BE%E3%81%99%2E%70%64;
filename*1*=%66
JVBERi0xLjQNJeLjz9MNCjUgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgMTIzNzgxNi9PIDcv
--________boundary-level1--
Return-Path: sender@domain.tld
From: Ye Olde Sender <sender@domain.tld>
To: my list <test@lists.example.com>
Message-ID: <4$@domain.tld>
Date: Tue, 21 Jan 2020 17:23:53 +0100
MIME-Version: 1.0
Subject: A nice subject
Content-type: multipart/alternative;boundary="________boundary-level1"
This is a multi-part that should be "mixed" but that Apple mail made "alternative".
--________boundary-level1
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=iso-8859-1
This a text/plain part, first member of the alternative
--________boundary-level1
Content-Type: multipart/mixed;
boundary="________boundary-level2"
--________boundary-level2
Content-Transfer-Encoding: quoted-printable
Content-Type: text/html;
charset=us-ascii
<html>
<head></head>
<body>An HTML part inside the multipart/mixed, second part of the alternative.</body>
</html>
--________boundary-level2
Content-Type: application/pdf;
name="globuz.pdf"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename="globuz.pdf"
ThisisaPDFattachement
--________boundary-level2--
--________boundary-level1--
Return-Path: sender@domain.tld
From: Ye Olde Sender <sender@domain.tld>
Subject: =?UTF-8?Q?Message_=c3=a0_urlizer?=
To: my list <test@lists.example.com>
Message-ID: <3$@domain.tld>
Date: Thu, 19 Dec 2019 11:03:19 +0100
MIME-Version: 1.0
Content-type: multipart/mixed; boundary="________boundary-level1"
This is a normal multi-part attachment message whose main type is multipart/mixed.
It contains a multipart/mixed sub-part with an attachment
--________boundary-level1
Content-Type: multipart/alternative;
boundary="________boundary-level2"
--________boundary-level2
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=utf-8
A simple text part. It is alternative with a multipart/mixed part.
--________boundary-level2
Content-Type: multipart/mixed;
boundary="________boundary-level3"
--________boundary-level3
Content-Transfer-Encoding: quoted-printable
Content-Type: text/html;
charset=utf-8
<html>
<head></head>
<body>A simple HTML part. Only this part contains the attachment. It should not but it happens anyway.</body>
</html>
--________boundary-level3
Content-Disposition: attachment;
filename="=?UTF-8?Q?W=c3=bcrzburg=2etxt?="
Content-Type: text/plain;
name="=?UTF-8?Q?W=c3=bcrzburg=2etxt?="
charset=us-ascii
Content-Transfer-Encoding: 8bit
This is a text file attached to the message. It should be urlized.
--________boundary-level3--
--________boundary-level2--
--________boundary-level1--
From: Ye Olde Sender <sender@domain.tld>
Subject: Nested message 1
To: my list <test@lists.example.com>
Message-ID: <simple@example.com>
Date: Wed, 8 Jan 2020 12:27:55 +0100
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="________boundary-level1"
Content-Language: en-US
This message is a normal multipart/mixed message without nested multiparts.
--________boundary-level1
Content-Type: multipart/alternative;
boundary="________boundary-level2"
--________boundary-level2
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 8bit
A Simple text part
--________boundary-level2
Content-Type: text/html; charset=utf-8
Content-Transfer-Encoding: 8bit
<html>
<head></head>
<body>A simple HTML part.</body>
</html>
--________boundary-level2--
--________boundary-level1
Content-Type: application/pdf;
name="attachment.pdf"
Content-Transfer-Encoding: base64
Content-Disposition: attachment;
filename="attachment.pdf"
JVBERi0xLjQNJeLjz9MNCjUgMCBvYmoNPDwvTGluZWFyaXplZCAxL0wgMTIzNzgxNi9PIDcv
--________boundary-level1
Content-Disposition: attachment;
filename="text.txt"
Content-Type: text/plain;
name="text.txt"
charset=us-ascii
Content-Transfer-Encoding: 8bit
This is a text file attached to the message. It should be urlized.
--________boundary-level1--
From: Ye Olde Sender <sender@domain.tld>
Subject: Nested message 1
To: my list <test@lists.example.com>
Message-ID: <simple@example.com>
Date: Wed, 8 Jan 2020 12:27:55 +0100
MIME-Version: 1.0
Content-Type: multipart/mixed;
boundary="________boundary-level1"
Content-Language: en-US
This message is a normal multipart/mixed attachements message with multiple attachments.
--________boundary-level1
Content-Type: multipart/alternative;
boundary="________boundary-level2"
--________boundary-level2
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 8bit
A Simple text part
--________boundary-level2
Content-Type: text/html; charset=utf-8
Content-Transfer-Encoding: 8bit
<html>
<head></head>
<body>A simple HTML part.</body>
</html>
--________boundary-level2--
--________boundary-level1
Content-Type: application/pdf;
name="attachment.pdf"
Content-Transfer-Encoding: base64