Archive.pm 13.3 KB
Newer Older
1
2
3
4
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id$

5
# Sympa - SYsteme de Multi-Postage Automatique
6
7
8
9
10
#
# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
# Copyright (c) 2011, 2012, 2013, 2014 GIP RENATER
11
12
13
14
15
16
17
18
19
20
21
22
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
23
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
root's avatar
root committed
24
25
26

package Archive;

27
use strict;
28
use warnings;
sikeda's avatar
sikeda committed
29
use Cwd qw();
30
use Digest::MD5 qw();
sikeda's avatar
sikeda committed
31
32
use Encode qw();
use HTML::Entities qw();
33

34
use Conf;
root's avatar
root committed
35
use Log;
36
37
use Message;
use tools;
root's avatar
root committed
38

39
my $serial_number = 0;    # incremented on each archived mail
root's avatar
root committed
40
41
42
43
44
45

## RCS identification.

## Does the real job : stores the message given as an argument into
## the indicated directory.

sympa-authors's avatar
sympa-authors committed
46
sub store_last {
47
48
    my ($list, $msg) = @_;

49
    Log::do_log('debug2', '');
50
51
52

    my ($filename, $newfile);

sympa-authors's avatar
sympa-authors committed
53
    return unless $list->is_archived();
54
55
    my $dir = $list->{'dir'} . '/archives';

root's avatar
root committed
56
    ## Create the archive directory if needed
57
    mkdir($dir, "0775") if !(-d $dir);
root's avatar
root committed
58
    chmod 0774, $dir;
59

root's avatar
root committed
60
61
    ## erase the last  message and replace it by the current one
    open(OUT, "> $dir/last_message");
62
63
64
65
    if (ref($msg)) {
        $msg->print(\*OUT);
    } else {
        print OUT $msg;
salaun's avatar
salaun committed
66
    }
root's avatar
root committed
67
    close(OUT);
68

root's avatar
root committed
69
70
71
72
73
74
}

## Lists the files included in the archive, preformatted for printing
## Returns an array.
sub list {
    my $name = shift;
sympa-authors's avatar
   
sympa-authors committed
75

76
    Log::do_log('debug', '(%s)', $name);
77
78
79

    my ($filename, $newfile);
    my (@l,        $i);
sympa-authors's avatar
   
sympa-authors committed
80

root's avatar
root committed
81
    unless (-d "$name") {
82
        Log::do_log('warning', '(%s) Failed, no directory %s', $name, $name);
83
#      @l = ($msg::no_archives_available);
84
85
        return @l;
    }
root's avatar
root committed
86
    unless (opendir(DIR, "$name")) {
87
88
        Log::do_log('warning', '(%s) Failed, cannot open directory %s',
            $name, $name);
89
#	@l = ($msg::no_archives_available);
90
91
92
93
94
95
96
97
        return @l;
    }
    foreach $i (sort readdir(DIR)) {
        next if ($i =~ /^\./o);
        next unless ($i =~ /^\d\d\d\d\-\d\d$/);
        my (@s) = stat("$name/$i");
        my $a = localtime($s[9]);
        push(@l, sprintf("%-40s %7d   %s\n", $i, $s[7], $a));
root's avatar
root committed
98
99
100
101
    }
    return @l;
}

sympa-authors's avatar
   
sympa-authors committed
102
sub scan_dir_archive {
103
104
105

    my ($dir, $month) = @_;

106
    Log::do_log('info', '(%s, %s)', $dir, $month);
107
108
109
110
111
112

    unless (opendir(DIR, "$dir/$month/arctxt")) {
        Log::do_log('info',
            "archive::scan_dir_archive($dir, $month): unable to open dir $dir/$month/arctxt"
        );
        return undef;
sympa-authors's avatar
   
sympa-authors committed
113
    }
114

sympa-authors's avatar
   
sympa-authors committed
115
    my $all_msg = [];
116
    my $i       = 0;
sympa-authors's avatar
   
sympa-authors committed
117
    foreach my $file (sort readdir(DIR)) {
118
119
120
121
122
123
124
125
126
127
128
129
130
131
        next unless ($file =~ /^\d+$/);
        Log::do_log('debug',
            "archive::scan_dir_archive($dir, $month): start parsing message $dir/$month/arctxt/$file"
        );

        my $mail = Message->new(
            {   'file'       => "$dir/$month/arctxt/$file",
                'noxsympato' => 'noxsympato'
            }
        );
        unless (defined $mail) {
            Log::do_log('err', 'Unable to create Message object %s', $file);
            return undef;
        }
sympa-authors's avatar
   
sympa-authors committed
132

133
        Log::do_log('debug', 'MAIL object: %s', $mail);
sympa-authors's avatar
   
sympa-authors committed
134

135
136
137
        $i++;
        my $msg = {};
        $msg->{'id'} = $i;
sympa-authors's avatar
   
sympa-authors committed
138

139
140
141
        $msg->{'subject'} = tools::decode_header($mail, 'Subject');
        $msg->{'from'}    = tools::decode_header($mail, 'From');
        $msg->{'date'}    = tools::decode_header($mail, 'Date');
sympa-authors's avatar
   
sympa-authors committed
142

143
        $msg->{'full_msg'} = $mail->{'msg'}->as_string;
sympa-authors's avatar
   
sympa-authors committed
144

145
146
        Log::do_log('debug', 'Adding message %s in archive to send',
            $msg->{'subject'});
sympa-authors's avatar
   
sympa-authors committed
147

148
        push @{$all_msg}, $msg;
sympa-authors's avatar
   
sympa-authors committed
149
    }
150
    closedir DIR;
sympa-authors's avatar
   
sympa-authors committed
151
152
153
154
155

    return $all_msg;
}

#####################################################
156
#  search_msgid
sympa-authors's avatar
   
sympa-authors committed
157
####################################################
158
#
sympa-authors's avatar
   
sympa-authors committed
159
# find a message in archive specified by arcpath and msgid
160
#
sympa-authors's avatar
   
sympa-authors committed
161
162
163
164
# IN : arcpath and msgid
#
# OUT : undef | #message in arctxt
#
165
####################################################
sympa-authors's avatar
   
sympa-authors committed
166
167
168

sub search_msgid {

169
170
    my ($dir, $msgid) = @_;

171
    Log::do_log('info', '(%s, %s)', $dir, $msgid);
172

sympa-authors's avatar
   
sympa-authors committed
173
    if ($msgid =~ /NO-ID-FOUND\.mhonarc\.org/) {
174
        Log::do_log('err', 'No message id found');
175
176
        return undef;
    }
sympa-authors's avatar
   
sympa-authors committed
177
    unless ($dir =~ /\d\d\d\d\-\d\d\/arctxt/) {
178
        Log::do_log('info', 'Dir %s look unproper', $dir);
179
        return undef;
sympa-authors's avatar
   
sympa-authors committed
180
    }
181
182
183
184
185
    unless (opendir(ARC, "$dir")) {
        Log::do_log('info',
            "archive::scan_dir_archive($dir, $msgid): unable to open dir $dir"
        );
        return undef;
sympa-authors's avatar
   
sympa-authors committed
186
    }
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
    chomp $msgid;

    foreach my $file (grep (!/\./, readdir ARC)) {
        next unless (open MAIL, "$dir/$file");
        while (<MAIL>) {
            last if /^$/;    #stop parse after end of headers
            if (/^Message-id:\s?<?([^>\s]+)>?\s?/i) {
                my $id = $1;
                if ($id eq $msgid) {
                    close MAIL;
                    closedir ARC;
                    return $file;
                }
            }
        }
        close MAIL;
sympa-authors's avatar
   
sympa-authors committed
203
204
205
206
207
    }
    closedir ARC;
    return undef;
}

root's avatar
root committed
208
sub exist {
209
    my ($name, $file) = @_;
root's avatar
root committed
210
    my $fn = "$name/$file";
211

root's avatar
root committed
212
213
214
215
    return $fn if (-r $fn && -f $fn);
    return undef;
}

sympa-authors's avatar
sympa-authors committed
216
217
# return path for latest message distributed in the list
sub last_path {
218

sympa-authors's avatar
sympa-authors committed
219
220
    my $list = shift;

221
    Log::do_log('debug', '(%s)', $list->{'name'});
222

sympa-authors's avatar
sympa-authors committed
223
    return undef unless ($list->is_archived());
224
    my $file = $list->{'dir'} . '/archives/last_message';
sympa-authors's avatar
sympa-authors committed
225

226
227
    return ($list->{'dir'} . '/archives/last_message')
        if (-f $list->{'dir'} . '/archives/last_message');
sympa-authors's avatar
sympa-authors committed
228
    return undef;
root's avatar
root committed
229

sympa-authors's avatar
sympa-authors committed
230
}
231
232
233
234
235
236

## Load an archived message, returns the mhonarc metadata
## IN : file_path
sub load_html_message {
    my %parameters = @_;

237
    Log::do_log('debug2', $parameters{'file_path'});
238
239
240
    my %metadata;

    unless (open ARC, $parameters{'file_path'}) {
241
242
        Log::do_log(
            'err',
243
            'Failed to load message "%s": %m',
244
245
246
            $parameters{'file_path'}
        );
        return undef;
247
248
249
    }

    while (<ARC>) {
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
        last if /^\s*$/;    ## Metadata end with an emtpy line

        if (/^<!--(\S+): (.*) -->$/) {
            my ($key, $value) = ($1, $2);
            $value =
                Encode::encode_utf8(
                HTML::Entities::decode_entities(Encode::decode_utf8($value)));
            if ($key eq 'X-From-R13') {
                $metadata{'X-From'} = $value;
                ## Mhonarc protection of email addresses
                $metadata{'X-From'} =~ tr/N-Z[@A-Mn-za-m/@A-Z[a-z/;
                $metadata{'X-From'} =~ s/^.*<(.*)>/$1/g;   ## Remove the gecos
            }
            $metadata{$key} = $value;
        }
265
266
267
    }

    close ARC;
268

269
270
271
    return \%metadata;
}

272
273
sub clean_archive_directory {
    Log::do_log('debug2', '(%s, %s)', @_);
274
    my $robot          = shift;
275
276
    my $dir_to_rebuild = shift;

sikeda's avatar
sikeda committed
277
    my $arc_root = Conf::get_robot_conf($robot, 'arc_path');
278
279
280
281
    my $answer;
    $answer->{'dir_to_rebuild'} = $arc_root . '/' . $dir_to_rebuild;
    $answer->{'cleaned_dir'} = $Conf::Conf{'tmpdir'} . '/' . $dir_to_rebuild;
    unless (
sikeda's avatar
sikeda committed
282
        my $number_of_copies = tools::copy_dir(
283
            $answer->{'dir_to_rebuild'}, $answer->{'cleaned_dir'}
284
285
        )
        ) {
sikeda's avatar
sikeda committed
286
        Log::do_log(
287
            'err',
288
            'Unable to create a temporary directory where to store files for HTML escaping (%s). Cancelling',
289
290
291
292
293
294
295
296
297
298
299
300
301
            $number_of_copies
        );
        return undef;
    }
    if (opendir ARCDIR, $answer->{'cleaned_dir'}) {
        my $files_left_uncleaned = 0;
        foreach my $file (readdir(ARCDIR)) {
            next if ($file =~ /^\./);
            $files_left_uncleaned++
                unless Archive::clean_archived_message($robot, $file, $file);
        }
        closedir DIR;
        if ($files_left_uncleaned) {
sikeda's avatar
sikeda committed
302
            Log::do_log('err',
303
                'HTML cleaning failed for %s files in the directory %s',
304
305
306
307
                $files_left_uncleaned, $answer->{'dir_to_rebuild'});
        }
        $answer->{'dir_to_rebuild'} = $answer->{'cleaned_dir'};
    } else {
sikeda's avatar
sikeda committed
308
        Log::do_log(
309
310
311
312
            'err',
            'Unable to open directory %s: %s',
            $answer->{'dir_to_rebuild'}, $!
        );
sikeda's avatar
sikeda committed
313
        tools::del_dir($answer->{'cleaned_dir'});
314
315
316
317
        return undef;
    }
    return $answer;
}
318

319
sub clean_archived_message {
320
    Log::do_log('debug2', '(%s, %s, %s)', @_);
321
322
    my $robot  = shift;
    my $input  = shift;
323
324
325
    my $output = shift;

    my $msg;
326
    unless ($msg = Message->new({'file' => $input, 'noxsympato' => 1})) {
327
328
329
330
        Log::do_log('err', 'Unable to create a Message object with file %s',
            $input);
        return undef;
    }
331

332
333
334
335
336
337
    if ($msg->clean_html($robot)) {
        if (open TMP, ">$output") {
            print TMP $msg->{'msg'}->as_string;
            close TMP;
            return 1;
        } else {
338
339
            Log::do_log(
                'err',
340
                'Unable to create a tmp file to write clean HTML to file %s',
341
342
                $output
            );
343
344
345
            return undef;
        }
    } else {
346
        Log::do_log('err', 'HTML cleaning in file %s failed', $output);
347
        return undef;
348
349
    }
}
root's avatar
root committed
350

351
###########################
352
# convert a message to HTML.
353
354
#    result is stored in $destination_dir
#    attachement_url is used to link attachement
355
#
356
357
358
# NOTE: This might be moved to Site package as a mutative method.
# NOTE: convert_single_msg_2_html() was deprecated.
sub convert_single_message {
359
360
361
    my $that    = shift;    # List or Robot object
    my $message = shift;    # Message object or hashref
    my %opts    = @_;
362
363
364
365
366
367

    my $list;
    my $robot;
    my $listname;
    my $hostname;
    if (ref $that eq 'List') {
368
369
370
371
372
373
374
375
376
        $list     = $that;
        $robot    = $that->{'domain'};
        $listname = $that->{'name'};
        $hostname = $that->{'admin'}{'host'};
    } elsif (!ref($that) and $that and $that ne '*') {
        $list     = '';
        $robot    = $that;
        $listname = '';
        $hostname = Conf::get_robot_conf($that, 'host');
377
    } else {
378
        die 'bug in logic.  Ask developer';
379
380
381
382
    }

    my $msg_as_string;
    if (ref $message eq 'Message') {
383
        $msg_as_string = $message->{'msg_as_string'};
384
    } elsif (ref $message eq 'HASH') {
385
        $msg_as_string = $message->{'messageasstring'};
386
    } else {
387
        die 'bug in logic.  Ask developer';
388
    }
root's avatar
root committed
389

390
391
392
    my $destination_dir = $opts{'destination_dir'};
    my $attachement_url = $opts{'attachement_url'};

393
394
    my $mhonarc_ressources =
        tools::search_fullpath($that, 'mhonarc-ressources.tt2');
395
    unless ($mhonarc_ressources) {
396
397
        Log::do_log('notice', 'Cannot find any MhOnArc ressource file');
        return undef;
398
399
400
    }

    unless (-d $destination_dir) {
401
402
403
404
        unless (tools::mkdir_all($destination_dir, 0755)) {
            Log::do_log('err', 'Unable to create %s', $destination_dir);
            return undef;
        }
405
    }
406
407
408

    my $msg_file = $destination_dir . '/msg00000.txt';
    unless (open OUT, '>', $msg_file) {
409
410
        Log::do_log('notice', 'Could Not open %s', $msg_file);
        return undef;
411
    }
412
413
414
    print OUT $msg_as_string;
    close OUT;

415
    # mhonarc require du change workdir so this proc must retore it
sikeda's avatar
sikeda committed
416
    my $pwd = Cwd::getcwd();
417

418
419
    ## generate HTML
    unless (chdir $destination_dir) {
420
421
422
        Log::do_log('err', 'Could not change working directory to %s',
            $destination_dir);
        return undef;
423
424
    }

425
    my $tag      = get_tag($that);
426
    my $exitcode = system(
427
        Conf::get_robot_conf($robot, 'mhonarc'), '-single',
428
429
430
431
        '-rcfile' => $mhonarc_ressources,
        '-definevars' =>
            "listname='$listname' hostname=$hostname tag=$tag yyyy='' mois=''",
        '-outdir'        => $destination_dir,
432
433
434
435
436
        '-attachmentdir' => $destination_dir,
        '-attachmenturl' => $attachement_url,
        '-umask'         => $Conf::Conf{'umask'},
        '-stdout'        => "$destination_dir/msg00000.html",
        '--', $msg_file
437
    ) >> 8;
438

439
440
    # restore current wd
    chdir $pwd;
441

442
    if ($exitcode) {
443
444
445
446
447
        Log::do_log(
            'err',
            'Command %s failed with exit code %d',
            Conf::get_robot_conf($robot, 'mhonarc'), $exitcode
        );
448
449
    }

450
451
452
    return 1;
}

453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
=head2 sub get_tag(OBJECT $that)

Returns a tag derived from the listname.

=head3 Arguments 

=over 

=item * I<$that>, a List or Robot object.

=back 

=head3 Return 

=over 

=item * I<a character string>, corresponding to the 10 last characters of a 32 bytes string containing the MD5 digest of the concatenation of the following strings (in this order):

=over 4

=item - the cookie config parameter

=item - a slash: "/"

=item - name attribute of the I<$that> argument

=back 

=back

=head3 Calls 

=over 

=item * Digest::MD5::md5_hex

=back 

=cut 

sub get_tag {
    my $that = shift;

    my $name;
    if (ref $that eq 'List') {
498
499
500
501
502
        $name = $that->{'name'};
    } elsif (!ref($that) and $that and $that ne '*') {
        $name = $that;
    } elsif (!ref($that)) {
        $name = '*';
503
504
    }

505
506
507
    return
        substr(Digest::MD5::md5_hex(join '/', $Conf::Conf{'cookie'}, $name),
        -10);
508
509
}

510
1;