Archive.pm 13.6 KB
Newer Older
1
2
3
4
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id$

5
# Sympa - SYsteme de Multi-Postage Automatique
6
7
8
9
#
# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
10
# Copyright (c) 2011, 2012, 2013, 2014, 2015 GIP RENATER
11
12
13
14
15
16
17
18
19
20
21
22
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
23
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
root's avatar
root committed
24

25
package Sympa::Archive;
root's avatar
root committed
26

27
use strict;
28
use warnings;
sikeda's avatar
sikeda committed
29
use Cwd qw();
30
use Digest::MD5 qw();
sikeda's avatar
sikeda committed
31
32
use Encode qw();
use HTML::Entities qw();
33

34
use Sympa;
35
use Conf;
36
use Sympa::Log;
37
use Sympa::Message;
38
use Sympa::Tools::File;
root's avatar
root committed
39

40
41
my $log = Sympa::Log->instance;

42
my $serial_number = 0;    # incremented on each archived mail
root's avatar
root committed
43
44
45
46
47
48

## RCS identification.

## Does the real job : stores the message given as an argument into
## the indicated directory.

sympa-authors's avatar
sympa-authors committed
49
sub store_last {
50
51
    my ($list, $msg) = @_;

52
    $log->syslog('debug2', '');
53

sympa-authors's avatar
sympa-authors committed
54
    return unless $list->is_archived();
55
56
    my $dir = $list->{'dir'} . '/archives';

root's avatar
root committed
57
    ## Create the archive directory if needed
58
    mkdir($dir, "0775") if !(-d $dir);
root's avatar
root committed
59
    chmod 0774, $dir;
60

root's avatar
root committed
61
62
    ## erase the last  message and replace it by the current one
    open(OUT, "> $dir/last_message");
63
64
65
66
    if (ref($msg)) {
        $msg->print(\*OUT);
    } else {
        print OUT $msg;
salaun's avatar
salaun committed
67
    }
root's avatar
root committed
68
    close(OUT);
69

root's avatar
root committed
70
71
72
73
74
75
}

## Lists the files included in the archive, preformatted for printing
## Returns an array.
sub list {
    my $name = shift;
sympa-authors's avatar
   
sympa-authors committed
76

77
    $log->syslog('debug', '(%s)', $name);
78

79
    my (@l, $i);
sympa-authors's avatar
   
sympa-authors committed
80

root's avatar
root committed
81
    unless (-d "$name") {
82
        $log->syslog('err', '(%s) Failed, no directory %s', $name, $name);
83
#      @l = ($msg::no_archives_available);
84
85
        return @l;
    }
root's avatar
root committed
86
    unless (opendir(DIR, "$name")) {
87
        $log->syslog('err', '(%s) Failed, cannot open directory %s',
88
            $name, $name);
89
#	@l = ($msg::no_archives_available);
90
91
92
93
94
95
96
97
        return @l;
    }
    foreach $i (sort readdir(DIR)) {
        next if ($i =~ /^\./o);
        next unless ($i =~ /^\d\d\d\d\-\d\d$/);
        my (@s) = stat("$name/$i");
        my $a = localtime($s[9]);
        push(@l, sprintf("%-40s %7d   %s\n", $i, $s[7], $a));
root's avatar
root committed
98
99
100
101
    }
    return @l;
}

sympa-authors's avatar
   
sympa-authors committed
102
sub scan_dir_archive {
103
    $log->syslog('debug3', '(%s, %s)', @_);
104
    my ($list, $month) = @_;
105

sikeda's avatar
sikeda committed
106
107
    my $dir =
        Conf::get_robot_conf($list->{'domain'}, 'arc_path') . '/'
108
        . $list->get_list_id();
109
110

    unless (opendir(DIR, "$dir/$month/arctxt")) {
111
        $log->syslog('info', 'Unable to open dir %s/%s/arctxt', $dir, $month);
112
        return undef;
sympa-authors's avatar
   
sympa-authors committed
113
    }
114

sympa-authors's avatar
   
sympa-authors committed
115
    my $all_msg = [];
116
    my $i       = 0;
sympa-authors's avatar
   
sympa-authors committed
117
    foreach my $file (sort readdir(DIR)) {
118
        next unless ($file =~ /^\d+$/);
119
        $log->syslog('debug', 'Start parsing message %s/%s/arctxt/%s',
120
            $dir, $month, $file);
121

sikeda's avatar
sikeda committed
122
        my $message =
sikeda's avatar
sikeda committed
123
            Sympa::Message->new_from_file("$dir/$month/arctxt/$file",
124
            context => $list);
125
        unless ($message) {
126
127
            $log->syslog('err',
                'Unable to create Message object from file %s', $file);
128
129
            return undef;
        }
130
131
        # Decrypt message if possible
        $message->smime_decrypt;
sympa-authors's avatar
   
sympa-authors committed
132

133
        $log->syslog('debug', 'MAIL object: %s', $message);
sympa-authors's avatar
   
sympa-authors committed
134

135
136
137
        $i++;
        my $msg = {};
        $msg->{'id'} = $i;
sympa-authors's avatar
   
sympa-authors committed
138

139
        $msg->{'subject'} = $message->{'decoded_subject'};
sikeda's avatar
sikeda committed
140
141
        $msg->{'from'}    = $message->get_decoded_header('From');
        $msg->{'date'}    = $message->get_decoded_header('Date');
sympa-authors's avatar
   
sympa-authors committed
142

143
        $msg->{'full_msg'} = $message->as_string;
sympa-authors's avatar
   
sympa-authors committed
144

145
        $log->syslog('debug', 'Adding message %s in archive to send',
146
            $msg->{'subject'});
sympa-authors's avatar
   
sympa-authors committed
147

148
        push @{$all_msg}, $msg;
sympa-authors's avatar
   
sympa-authors committed
149
    }
150
    closedir DIR;
sympa-authors's avatar
   
sympa-authors committed
151
152
153
154
155

    return $all_msg;
}

#####################################################
156
#  search_msgid
sympa-authors's avatar
   
sympa-authors committed
157
####################################################
158
#
sympa-authors's avatar
   
sympa-authors committed
159
# find a message in archive specified by arcpath and msgid
160
#
sympa-authors's avatar
   
sympa-authors committed
161
162
163
164
# IN : arcpath and msgid
#
# OUT : undef | #message in arctxt
#
165
####################################################
sympa-authors's avatar
   
sympa-authors committed
166
167
168

sub search_msgid {

169
170
    my ($dir, $msgid) = @_;

171
    $log->syslog('info', '(%s, %s)', $dir, $msgid);
172

sympa-authors's avatar
   
sympa-authors committed
173
    if ($msgid =~ /NO-ID-FOUND\.mhonarc\.org/) {
174
        $log->syslog('err', 'No message id found');
175
176
        return undef;
    }
sympa-authors's avatar
   
sympa-authors committed
177
    unless ($dir =~ /\d\d\d\d\-\d\d\/arctxt/) {
178
        $log->syslog('info', 'Dir %s look unproper', $dir);
179
        return undef;
sympa-authors's avatar
   
sympa-authors committed
180
    }
181
    unless (opendir(ARC, "$dir")) {
182
        $log->syslog('info',
183
184
185
            "archive::scan_dir_archive($dir, $msgid): unable to open dir $dir"
        );
        return undef;
sympa-authors's avatar
   
sympa-authors committed
186
    }
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
    chomp $msgid;

    foreach my $file (grep (!/\./, readdir ARC)) {
        next unless (open MAIL, "$dir/$file");
        while (<MAIL>) {
            last if /^$/;    #stop parse after end of headers
            if (/^Message-id:\s?<?([^>\s]+)>?\s?/i) {
                my $id = $1;
                if ($id eq $msgid) {
                    close MAIL;
                    closedir ARC;
                    return $file;
                }
            }
        }
        close MAIL;
sympa-authors's avatar
   
sympa-authors committed
203
204
205
206
207
    }
    closedir ARC;
    return undef;
}

root's avatar
root committed
208
sub exist {
209
    my ($name, $file) = @_;
root's avatar
root committed
210
    my $fn = "$name/$file";
211

root's avatar
root committed
212
213
214
215
    return $fn if (-r $fn && -f $fn);
    return undef;
}

sympa-authors's avatar
sympa-authors committed
216
217
# return path for latest message distributed in the list
sub last_path {
218
    $log->syslog('debug', '(%s)', @_);
sympa-authors's avatar
sympa-authors committed
219
220
    my $list = shift;

221
    return undef unless $list->is_archived();
222

223
    my $file = $list->{'dir'} . '/archives/last_message';
224
    return $file if -f $file;
sympa-authors's avatar
sympa-authors committed
225
226
227

    return undef;
}
228
229
230
231
232
233

## Load an archived message, returns the mhonarc metadata
## IN : file_path
sub load_html_message {
    my %parameters = @_;

234
    $log->syslog('debug2', $parameters{'file_path'});
235
236
237
    my %metadata;

    unless (open ARC, $parameters{'file_path'}) {
238
        $log->syslog(
239
            'err',
240
            'Failed to load message "%s": %m',
241
242
243
            $parameters{'file_path'}
        );
        return undef;
244
245
246
    }

    while (<ARC>) {
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
        last if /^\s*$/;    ## Metadata end with an emtpy line

        if (/^<!--(\S+): (.*) -->$/) {
            my ($key, $value) = ($1, $2);
            $value =
                Encode::encode_utf8(
                HTML::Entities::decode_entities(Encode::decode_utf8($value)));
            if ($key eq 'X-From-R13') {
                $metadata{'X-From'} = $value;
                ## Mhonarc protection of email addresses
                $metadata{'X-From'} =~ tr/N-Z[@A-Mn-za-m/@A-Z[a-z/;
                $metadata{'X-From'} =~ s/^.*<(.*)>/$1/g;   ## Remove the gecos
            }
            $metadata{$key} = $value;
        }
262
263
264
    }

    close ARC;
265

266
267
268
    return \%metadata;
}

269
sub clean_archive_directory {
270
    $log->syslog('debug2', '(%s, %s)', @_);
271
    my $robot          = shift;
272
273
    my $dir_to_rebuild = shift;

sikeda's avatar
sikeda committed
274
    my $arc_root = Conf::get_robot_conf($robot, 'arc_path');
275
276
277
278
    my $answer;
    $answer->{'dir_to_rebuild'} = $arc_root . '/' . $dir_to_rebuild;
    $answer->{'cleaned_dir'} = $Conf::Conf{'tmpdir'} . '/' . $dir_to_rebuild;
    unless (
279
280
281
        my $number_of_copies = Sympa::Tools::File::copy_dir(
            $answer->{'dir_to_rebuild'},
            $answer->{'cleaned_dir'}
282
283
        )
        ) {
284
        $log->syslog(
285
            'err',
286
            'Unable to create a temporary directory where to store files for HTML escaping (%s). Cancelling',
287
288
289
290
291
292
293
294
295
            $number_of_copies
        );
        return undef;
    }
    if (opendir ARCDIR, $answer->{'cleaned_dir'}) {
        my $files_left_uncleaned = 0;
        foreach my $file (readdir(ARCDIR)) {
            next if ($file =~ /^\./);
            $files_left_uncleaned++
296
297
298
299
300
                unless clean_archived_message(
                $robot, undef,
                $answer->{'cleaned_dir'} . '/' . $file,
                $answer->{'cleaned_dir'} . '/' . $file
                );
301
302
303
        }
        closedir DIR;
        if ($files_left_uncleaned) {
304
            $log->syslog('err',
305
                'HTML cleaning failed for %s files in the directory %s',
306
307
308
309
                $files_left_uncleaned, $answer->{'dir_to_rebuild'});
        }
        $answer->{'dir_to_rebuild'} = $answer->{'cleaned_dir'};
    } else {
310
        $log->syslog(
311
            'err',
312
313
            'Unable to open directory %s: %m',
            $answer->{'dir_to_rebuild'}
314
        );
315
        Sympa::Tools::File::del_dir($answer->{'cleaned_dir'});
316
317
318
319
        return undef;
    }
    return $answer;
}
320

321
sub clean_archived_message {
322
    $log->syslog('debug2', '(%s, %s, %s)', @_);
323
    my $robot  = shift;
sikeda's avatar
sikeda committed
324
    my $list   = shift;
325
    my $input  = shift;
326
327
    my $output = shift;

328
329
    my $message =
        Sympa::Message->new_from_file($input, context => ($list || $robot),);
330
    unless ($message) {
331
        $log->syslog('err', 'Unable to create a Message object with file %s',
332
333
334
            $input);
        return undef;
    }
335

336
    if ($message->clean_html) {
337
338
        if (open TMP, '>', $output) {
            print TMP $message->as_string;
339
340
341
            close TMP;
            return 1;
        } else {
342
            $log->syslog(
343
                'err',
344
                'Unable to create a tmp file to write clean HTML to file %s',
345
346
                $output
            );
347
348
349
            return undef;
        }
    } else {
350
        $log->syslog('err', 'HTML cleaning in file %s failed', $output);
351
        return undef;
352
353
    }
}
root's avatar
root committed
354

355
###########################
356
# convert a message to HTML.
357
358
#    result is stored in $destination_dir
#    attachement_url is used to link attachement
359
#
360
361
362
# NOTE: This might be moved to Site package as a mutative method.
# NOTE: convert_single_msg_2_html() was deprecated.
sub convert_single_message {
363
364
365
    my $that    = shift;    # List or Robot object
    my $message = shift;    # Message object or hashref
    my %opts    = @_;
366
367
368
369
370

    my $list;
    my $robot;
    my $listname;
    my $hostname;
371
    if (ref $that eq 'Sympa::List') {
372
373
374
375
376
377
378
379
380
        $list     = $that;
        $robot    = $that->{'domain'};
        $listname = $that->{'name'};
        $hostname = $that->{'admin'}{'host'};
    } elsif (!ref($that) and $that and $that ne '*') {
        $list     = '';
        $robot    = $that;
        $listname = '';
        $hostname = Conf::get_robot_conf($that, 'host');
381
    } else {
382
        die 'bug in logic.  Ask developer';
383
384
385
    }

    my $msg_as_string;
386
    if (ref $message eq 'Sympa::Message') {
387
        $msg_as_string = $message->as_string;
388
    } elsif (ref $message eq 'HASH') {
389
        $msg_as_string = $message->{'messageasstring'};
390
    } else {
391
        die 'bug in logic.  Ask developer';
392
    }
root's avatar
root committed
393

394
395
396
    my $destination_dir = $opts{'destination_dir'};
    my $attachement_url = $opts{'attachement_url'};

397
    my $mhonarc_ressources =
398
        Sympa::search_fullpath($that, 'mhonarc-ressources.tt2');
399
    unless ($mhonarc_ressources) {
400
        $log->syslog('notice', 'Cannot find any MhOnArc ressource file');
401
        return undef;
402
403
404
    }

    unless (-d $destination_dir) {
405
        unless (Sympa::Tools::File::mkdir_all($destination_dir, 0755)) {
406
            $log->syslog('err', 'Unable to create %s', $destination_dir);
407
408
            return undef;
        }
409
    }
410
411
412

    my $msg_file = $destination_dir . '/msg00000.txt';
    unless (open OUT, '>', $msg_file) {
413
        $log->syslog('notice', 'Could Not open %s', $msg_file);
414
        return undef;
415
    }
416
417
418
    print OUT $msg_as_string;
    close OUT;

419
    # mhonarc require du change workdir so this proc must retore it
sikeda's avatar
sikeda committed
420
    my $pwd = Cwd::getcwd();
421

422
423
    ## generate HTML
    unless (chdir $destination_dir) {
424
        $log->syslog('err', 'Could not change working directory to %s',
425
426
            $destination_dir);
        return undef;
427
428
    }

429
    my $tag      = get_tag($that);
430
    my $exitcode = system(
sikeda's avatar
sikeda committed
431
432
433
        Conf::get_robot_conf($robot, 'mhonarc'),
        '-single',
        '-rcfile'     => $mhonarc_ressources,
434
435
        '-definevars' => sprintf(
            "listname='%s' hostname=%s yyyy='' mois='' tag=%s",
sikeda's avatar
sikeda committed
436
437
            $listname, $hostname, $tag
        ),
438
        '-outdir'        => $destination_dir,
439
440
441
442
        '-attachmentdir' => $destination_dir,
        '-attachmenturl' => $attachement_url,
        '-umask'         => $Conf::Conf{'umask'},
        '-stdout'        => "$destination_dir/msg00000.html",
sikeda's avatar
sikeda committed
443
444
        '--',
        $msg_file
445
    ) >> 8;
446

447
448
    # restore current wd
    chdir $pwd;
449

450
    if ($exitcode) {
451
        $log->syslog(
452
453
454
455
            'err',
            'Command %s failed with exit code %d',
            Conf::get_robot_conf($robot, 'mhonarc'), $exitcode
        );
456
457
    }

458
459
460
    return 1;
}

461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
=head2 sub get_tag(OBJECT $that)

Returns a tag derived from the listname.

=head3 Arguments 

=over 

=item * I<$that>, a List or Robot object.

=back 

=head3 Return 

=over 

=item * I<a character string>, corresponding to the 10 last characters of a 32 bytes string containing the MD5 digest of the concatenation of the following strings (in this order):

=over 4

=item - the cookie config parameter

=item - a slash: "/"

=item - name attribute of the I<$that> argument

=back 

=back

=cut 

sub get_tag {
    my $that = shift;

    my $name;
497
    if (ref $that eq 'Sympa::List') {
498
499
500
501
502
        $name = $that->{'name'};
    } elsif (!ref($that) and $that and $that ne '*') {
        $name = $that;
    } elsif (!ref($that)) {
        $name = '*';
503
504
    }

505
506
507
    my $cookie = $Conf::Conf{'cookie'};
    $cookie = '' unless defined $cookie;
    return substr(Digest::MD5::md5_hex(join '/', $cookie, $name), -10);
508
509
}

510
1;