Archive.pm 13.5 KB
Newer Older
1
2
3
4
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id$

5
# Sympa - SYsteme de Multi-Postage Automatique
6
7
8
9
#
# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
10
# Copyright (c) 2011, 2012, 2013, 2014, 2015 GIP RENATER
11
12
13
14
15
16
17
18
19
20
21
22
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
23
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
root's avatar
root committed
24

25
package Sympa::Archive;
root's avatar
root committed
26

27
use strict;
28
use warnings;
sikeda's avatar
sikeda committed
29
use Cwd qw();
30
use Digest::MD5 qw();
sikeda's avatar
sikeda committed
31
32
use Encode qw();
use HTML::Entities qw();
33

34
use Conf;
root's avatar
root committed
35
use Log;
36
use Sympa::Message;
37
use tools;
38
use Sympa::Tools::File;
root's avatar
root committed
39

40
my $serial_number = 0;    # incremented on each archived mail
root's avatar
root committed
41
42
43
44
45
46

## RCS identification.

## Does the real job : stores the message given as an argument into
## the indicated directory.

sympa-authors's avatar
sympa-authors committed
47
sub store_last {
48
49
    my ($list, $msg) = @_;

50
    Log::do_log('debug2', '');
51

sympa-authors's avatar
sympa-authors committed
52
    return unless $list->is_archived();
53
54
    my $dir = $list->{'dir'} . '/archives';

root's avatar
root committed
55
    ## Create the archive directory if needed
56
    mkdir($dir, "0775") if !(-d $dir);
root's avatar
root committed
57
    chmod 0774, $dir;
58

root's avatar
root committed
59
60
    ## erase the last  message and replace it by the current one
    open(OUT, "> $dir/last_message");
61
62
63
64
    if (ref($msg)) {
        $msg->print(\*OUT);
    } else {
        print OUT $msg;
salaun's avatar
salaun committed
65
    }
root's avatar
root committed
66
    close(OUT);
67

root's avatar
root committed
68
69
70
71
72
73
}

## Lists the files included in the archive, preformatted for printing
## Returns an array.
sub list {
    my $name = shift;
sympa-authors's avatar
   
sympa-authors committed
74

75
    Log::do_log('debug', '(%s)', $name);
76

77
    my (@l, $i);
sympa-authors's avatar
   
sympa-authors committed
78

root's avatar
root committed
79
    unless (-d "$name") {
80
        Log::do_log('err', '(%s) Failed, no directory %s', $name, $name);
81
#      @l = ($msg::no_archives_available);
82
83
        return @l;
    }
root's avatar
root committed
84
    unless (opendir(DIR, "$name")) {
85
        Log::do_log('err', '(%s) Failed, cannot open directory %s',
86
            $name, $name);
87
#	@l = ($msg::no_archives_available);
88
89
90
91
92
93
94
95
        return @l;
    }
    foreach $i (sort readdir(DIR)) {
        next if ($i =~ /^\./o);
        next unless ($i =~ /^\d\d\d\d\-\d\d$/);
        my (@s) = stat("$name/$i");
        my $a = localtime($s[9]);
        push(@l, sprintf("%-40s %7d   %s\n", $i, $s[7], $a));
root's avatar
root committed
96
97
98
99
    }
    return @l;
}

sympa-authors's avatar
   
sympa-authors committed
100
sub scan_dir_archive {
101
102
    Log::do_log('debug3', '(%s, %s)', @_);
    my ($list, $month) = @_;
103

sikeda's avatar
sikeda committed
104
105
    my $dir =
        Conf::get_robot_conf($list->{'domain'}, 'arc_path') . '/'
106
        . $list->get_list_id();
107
108

    unless (opendir(DIR, "$dir/$month/arctxt")) {
109
        Log::do_log('info', 'Unable to open dir %s/%s/arctxt', $dir, $month);
110
        return undef;
sympa-authors's avatar
   
sympa-authors committed
111
    }
112

sympa-authors's avatar
   
sympa-authors committed
113
    my $all_msg = [];
114
    my $i       = 0;
sympa-authors's avatar
   
sympa-authors committed
115
    foreach my $file (sort readdir(DIR)) {
116
        next unless ($file =~ /^\d+$/);
117
118
        Log::do_log('debug', 'Start parsing message %s/%s/arctxt/%s',
            $dir, $month, $file);
119

sikeda's avatar
sikeda committed
120
        my $message =
sikeda's avatar
sikeda committed
121
            Sympa::Message->new_from_file("$dir/$month/arctxt/$file",
122
            context => $list);
123
124
125
        unless ($message) {
            Log::do_log('err', 'Unable to create Message object from file %s',
                $file);
126
127
            return undef;
        }
128
129
        # Decrypt message if possible
        $message->smime_decrypt;
sympa-authors's avatar
   
sympa-authors committed
130

131
        Log::do_log('debug', 'MAIL object: %s', $message);
sympa-authors's avatar
   
sympa-authors committed
132

133
134
135
        $i++;
        my $msg = {};
        $msg->{'id'} = $i;
sympa-authors's avatar
   
sympa-authors committed
136

137
        $msg->{'subject'} = $message->{'decoded_subject'};
sikeda's avatar
sikeda committed
138
139
        $msg->{'from'}    = $message->get_decoded_header('From');
        $msg->{'date'}    = $message->get_decoded_header('Date');
sympa-authors's avatar
   
sympa-authors committed
140

141
        $msg->{'full_msg'} = $message->as_string;
sympa-authors's avatar
   
sympa-authors committed
142

143
144
        Log::do_log('debug', 'Adding message %s in archive to send',
            $msg->{'subject'});
sympa-authors's avatar
   
sympa-authors committed
145

146
        push @{$all_msg}, $msg;
sympa-authors's avatar
   
sympa-authors committed
147
    }
148
    closedir DIR;
sympa-authors's avatar
   
sympa-authors committed
149
150
151
152
153

    return $all_msg;
}

#####################################################
154
#  search_msgid
sympa-authors's avatar
   
sympa-authors committed
155
####################################################
156
#
sympa-authors's avatar
   
sympa-authors committed
157
# find a message in archive specified by arcpath and msgid
158
#
sympa-authors's avatar
   
sympa-authors committed
159
160
161
162
# IN : arcpath and msgid
#
# OUT : undef | #message in arctxt
#
163
####################################################
sympa-authors's avatar
   
sympa-authors committed
164
165
166

sub search_msgid {

167
168
    my ($dir, $msgid) = @_;

169
    Log::do_log('info', '(%s, %s)', $dir, $msgid);
170

sympa-authors's avatar
   
sympa-authors committed
171
    if ($msgid =~ /NO-ID-FOUND\.mhonarc\.org/) {
172
        Log::do_log('err', 'No message id found');
173
174
        return undef;
    }
sympa-authors's avatar
   
sympa-authors committed
175
    unless ($dir =~ /\d\d\d\d\-\d\d\/arctxt/) {
176
        Log::do_log('info', 'Dir %s look unproper', $dir);
177
        return undef;
sympa-authors's avatar
   
sympa-authors committed
178
    }
179
180
181
182
183
    unless (opendir(ARC, "$dir")) {
        Log::do_log('info',
            "archive::scan_dir_archive($dir, $msgid): unable to open dir $dir"
        );
        return undef;
sympa-authors's avatar
   
sympa-authors committed
184
    }
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
    chomp $msgid;

    foreach my $file (grep (!/\./, readdir ARC)) {
        next unless (open MAIL, "$dir/$file");
        while (<MAIL>) {
            last if /^$/;    #stop parse after end of headers
            if (/^Message-id:\s?<?([^>\s]+)>?\s?/i) {
                my $id = $1;
                if ($id eq $msgid) {
                    close MAIL;
                    closedir ARC;
                    return $file;
                }
            }
        }
        close MAIL;
sympa-authors's avatar
   
sympa-authors committed
201
202
203
204
205
    }
    closedir ARC;
    return undef;
}

root's avatar
root committed
206
sub exist {
207
    my ($name, $file) = @_;
root's avatar
root committed
208
    my $fn = "$name/$file";
209

root's avatar
root committed
210
211
212
213
    return $fn if (-r $fn && -f $fn);
    return undef;
}

sympa-authors's avatar
sympa-authors committed
214
215
# return path for latest message distributed in the list
sub last_path {
216
    Log::do_log('debug', '(%s)', @_);
sympa-authors's avatar
sympa-authors committed
217
218
    my $list = shift;

219
    return undef unless $list->is_archived();
220

221
    my $file = $list->{'dir'} . '/archives/last_message';
222
    return $file if -f $file;
sympa-authors's avatar
sympa-authors committed
223
224
225

    return undef;
}
226
227
228
229
230
231

## Load an archived message, returns the mhonarc metadata
## IN : file_path
sub load_html_message {
    my %parameters = @_;

232
    Log::do_log('debug2', $parameters{'file_path'});
233
234
235
    my %metadata;

    unless (open ARC, $parameters{'file_path'}) {
236
237
        Log::do_log(
            'err',
238
            'Failed to load message "%s": %m',
239
240
241
            $parameters{'file_path'}
        );
        return undef;
242
243
244
    }

    while (<ARC>) {
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
        last if /^\s*$/;    ## Metadata end with an emtpy line

        if (/^<!--(\S+): (.*) -->$/) {
            my ($key, $value) = ($1, $2);
            $value =
                Encode::encode_utf8(
                HTML::Entities::decode_entities(Encode::decode_utf8($value)));
            if ($key eq 'X-From-R13') {
                $metadata{'X-From'} = $value;
                ## Mhonarc protection of email addresses
                $metadata{'X-From'} =~ tr/N-Z[@A-Mn-za-m/@A-Z[a-z/;
                $metadata{'X-From'} =~ s/^.*<(.*)>/$1/g;   ## Remove the gecos
            }
            $metadata{$key} = $value;
        }
260
261
262
    }

    close ARC;
263

264
265
266
    return \%metadata;
}

267
268
sub clean_archive_directory {
    Log::do_log('debug2', '(%s, %s)', @_);
269
    my $robot          = shift;
270
271
    my $dir_to_rebuild = shift;

sikeda's avatar
sikeda committed
272
    my $arc_root = Conf::get_robot_conf($robot, 'arc_path');
273
274
275
276
    my $answer;
    $answer->{'dir_to_rebuild'} = $arc_root . '/' . $dir_to_rebuild;
    $answer->{'cleaned_dir'} = $Conf::Conf{'tmpdir'} . '/' . $dir_to_rebuild;
    unless (
277
278
279
        my $number_of_copies = Sympa::Tools::File::copy_dir(
            $answer->{'dir_to_rebuild'},
            $answer->{'cleaned_dir'}
280
281
        )
        ) {
sikeda's avatar
sikeda committed
282
        Log::do_log(
283
            'err',
284
            'Unable to create a temporary directory where to store files for HTML escaping (%s). Cancelling',
285
286
287
288
289
290
291
292
293
            $number_of_copies
        );
        return undef;
    }
    if (opendir ARCDIR, $answer->{'cleaned_dir'}) {
        my $files_left_uncleaned = 0;
        foreach my $file (readdir(ARCDIR)) {
            next if ($file =~ /^\./);
            $files_left_uncleaned++
294
295
296
297
298
                unless clean_archived_message(
                $robot, undef,
                $answer->{'cleaned_dir'} . '/' . $file,
                $answer->{'cleaned_dir'} . '/' . $file
                );
299
300
301
        }
        closedir DIR;
        if ($files_left_uncleaned) {
sikeda's avatar
sikeda committed
302
            Log::do_log('err',
303
                'HTML cleaning failed for %s files in the directory %s',
304
305
306
307
                $files_left_uncleaned, $answer->{'dir_to_rebuild'});
        }
        $answer->{'dir_to_rebuild'} = $answer->{'cleaned_dir'};
    } else {
sikeda's avatar
sikeda committed
308
        Log::do_log(
309
            'err',
310
311
            'Unable to open directory %s: %m',
            $answer->{'dir_to_rebuild'}
312
        );
313
        Sympa::Tools::File::del_dir($answer->{'cleaned_dir'});
314
315
316
317
        return undef;
    }
    return $answer;
}
318

319
sub clean_archived_message {
320
    Log::do_log('debug2', '(%s, %s, %s)', @_);
321
    my $robot  = shift;
sikeda's avatar
sikeda committed
322
    my $list   = shift;
323
    my $input  = shift;
324
325
    my $output = shift;

326
327
    my $message =
        Sympa::Message->new_from_file($input, context => ($list || $robot),);
328
    unless ($message) {
329
330
331
332
        Log::do_log('err', 'Unable to create a Message object with file %s',
            $input);
        return undef;
    }
333

334
    if ($message->clean_html) {
335
336
        if (open TMP, '>', $output) {
            print TMP $message->as_string;
337
338
339
            close TMP;
            return 1;
        } else {
340
341
            Log::do_log(
                'err',
342
                'Unable to create a tmp file to write clean HTML to file %s',
343
344
                $output
            );
345
346
347
            return undef;
        }
    } else {
348
        Log::do_log('err', 'HTML cleaning in file %s failed', $output);
349
        return undef;
350
351
    }
}
root's avatar
root committed
352

353
###########################
354
# convert a message to HTML.
355
356
#    result is stored in $destination_dir
#    attachement_url is used to link attachement
357
#
358
359
360
# NOTE: This might be moved to Site package as a mutative method.
# NOTE: convert_single_msg_2_html() was deprecated.
sub convert_single_message {
361
362
363
    my $that    = shift;    # List or Robot object
    my $message = shift;    # Message object or hashref
    my %opts    = @_;
364
365
366
367
368

    my $list;
    my $robot;
    my $listname;
    my $hostname;
369
    if (ref $that eq 'Sympa::List') {
370
371
372
373
374
375
376
377
378
        $list     = $that;
        $robot    = $that->{'domain'};
        $listname = $that->{'name'};
        $hostname = $that->{'admin'}{'host'};
    } elsif (!ref($that) and $that and $that ne '*') {
        $list     = '';
        $robot    = $that;
        $listname = '';
        $hostname = Conf::get_robot_conf($that, 'host');
379
    } else {
380
        die 'bug in logic.  Ask developer';
381
382
383
    }

    my $msg_as_string;
384
    if (ref $message eq 'Sympa::Message') {
385
        $msg_as_string = $message->as_string;
386
    } elsif (ref $message eq 'HASH') {
387
        $msg_as_string = $message->{'messageasstring'};
388
    } else {
389
        die 'bug in logic.  Ask developer';
390
    }
root's avatar
root committed
391

392
393
394
    my $destination_dir = $opts{'destination_dir'};
    my $attachement_url = $opts{'attachement_url'};

395
396
    my $mhonarc_ressources =
        tools::search_fullpath($that, 'mhonarc-ressources.tt2');
397
    unless ($mhonarc_ressources) {
398
399
        Log::do_log('notice', 'Cannot find any MhOnArc ressource file');
        return undef;
400
401
402
    }

    unless (-d $destination_dir) {
403
        unless (Sympa::Tools::File::mkdir_all($destination_dir, 0755)) {
404
405
406
            Log::do_log('err', 'Unable to create %s', $destination_dir);
            return undef;
        }
407
    }
408
409
410

    my $msg_file = $destination_dir . '/msg00000.txt';
    unless (open OUT, '>', $msg_file) {
411
412
        Log::do_log('notice', 'Could Not open %s', $msg_file);
        return undef;
413
    }
414
415
416
    print OUT $msg_as_string;
    close OUT;

417
    # mhonarc require du change workdir so this proc must retore it
sikeda's avatar
sikeda committed
418
    my $pwd = Cwd::getcwd();
419

420
421
    ## generate HTML
    unless (chdir $destination_dir) {
422
423
424
        Log::do_log('err', 'Could not change working directory to %s',
            $destination_dir);
        return undef;
425
426
    }

427
    my $tag      = get_tag($that);
428
    my $exitcode = system(
sikeda's avatar
sikeda committed
429
430
431
        Conf::get_robot_conf($robot, 'mhonarc'),
        '-single',
        '-rcfile'     => $mhonarc_ressources,
432
433
        '-definevars' => sprintf(
            "listname='%s' hostname=%s yyyy='' mois='' tag=%s",
sikeda's avatar
sikeda committed
434
435
            $listname, $hostname, $tag
        ),
436
        '-outdir'        => $destination_dir,
437
438
439
440
        '-attachmentdir' => $destination_dir,
        '-attachmenturl' => $attachement_url,
        '-umask'         => $Conf::Conf{'umask'},
        '-stdout'        => "$destination_dir/msg00000.html",
sikeda's avatar
sikeda committed
441
442
        '--',
        $msg_file
443
    ) >> 8;
444

445
446
    # restore current wd
    chdir $pwd;
447

448
    if ($exitcode) {
449
450
451
452
453
        Log::do_log(
            'err',
            'Command %s failed with exit code %d',
            Conf::get_robot_conf($robot, 'mhonarc'), $exitcode
        );
454
455
    }

456
457
458
    return 1;
}

459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
=head2 sub get_tag(OBJECT $that)

Returns a tag derived from the listname.

=head3 Arguments 

=over 

=item * I<$that>, a List or Robot object.

=back 

=head3 Return 

=over 

=item * I<a character string>, corresponding to the 10 last characters of a 32 bytes string containing the MD5 digest of the concatenation of the following strings (in this order):

=over 4

=item - the cookie config parameter

=item - a slash: "/"

=item - name attribute of the I<$that> argument

=back 

=back

=cut 

sub get_tag {
    my $that = shift;

    my $name;
495
    if (ref $that eq 'Sympa::List') {
496
497
498
499
500
        $name = $that->{'name'};
    } elsif (!ref($that) and $that and $that ne '*') {
        $name = $that;
    } elsif (!ref($that)) {
        $name = '*';
501
502
    }

503
504
505
    return
        substr(Digest::MD5::md5_hex(join '/', $Conf::Conf{'cookie'}, $name),
        -10);
506
507
}

508
1;