Archive.pm 31.6 KB
Newer Older
1
2
3
4
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# $Id$

5
# Sympa - SYsteme de Multi-Postage Automatique
6
7
8
9
#
# Copyright (c) 1997, 1998, 1999 Institut Pasteur & Christophe Wolfhugel
# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
10
# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
11
12
# Copyright 2018, 2019, 2020, 2021 The Sympa Community. See the
# AUTHORS.md file at the top-level directory of this distribution and at
13
# <https://github.com/sympa-community/sympa.git>.
14
15
16
17
18
19
20
21
22
23
24
25
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
26
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
root's avatar
root committed
27

28
package Sympa::Archive;
root's avatar
root committed
29

30
use strict;
31
use warnings;
sikeda's avatar
sikeda committed
32
33
use Cwd qw();
use Encode qw();
34
35
use English qw(-no_match_vars);
use File::Path qw();
36
use IO::File;
37
use POSIX qw();
38

39
use Sympa;
40
use Conf;
41
42
use Sympa::Constants;
use Sympa::LockedFile;
43
use Sympa::Log;
44
use Sympa::Message;
45
use Sympa::Spool;
46
use Sympa::Tools::File;
47
use Sympa::Tools::Text;
root's avatar
root committed
48

49
50
my $log = Sympa::Log->instance;

51
sub new {
IKEDA Soji's avatar
IKEDA Soji committed
52
53
    my $class   = shift;
    my %options = @_;
54

IKEDA Soji's avatar
IKEDA Soji committed
55
    my $list = $options{context};
56
57
58
59
    die 'Bug in logic.  Ask developer' unless ref $list eq 'Sympa::List';

    my $self = bless {
        context           => $list,
60
        base_directory    => $list->get_archive_dir,
61
62
63
64
65
66
        arc_directory     => undef,
        directory         => undef,
        deleted_directory => undef,
        _metadatas        => undef,
    } => $class;

67
    $self->_create_spool(%options);
68
69
70

    return $self;
}
root's avatar
root committed
71

72
sub _create_spool {
73
74
    my $self    = shift;
    my %options = @_;
75
76
77

    my $umask = umask oct $Conf::Conf{'umask'};
    foreach my $directory ($Conf::Conf{'arc_path'}, $self->{base_directory}) {
78
79
80
        if (-d $directory) {
            next;
        } elsif ($options{create}) {
81
82
83
84
85
86
87
88
            $log->syslog('info', 'Creating spool %s', $directory);
            unless (
                mkdir($directory, 0755)
                and Sympa::Tools::File::set_file_rights(
                    file  => $directory,
                    user  => Sympa::Constants::USER(),
                    group => Sympa::Constants::GROUP()
                )
Luc Didry's avatar
Luc Didry committed
89
            ) {
90
91
92
93
94
95
                die sprintf 'Cannot create %s: %s', $directory, $ERRNO;
            }
        }
    }
    umask $umask;
}
root's avatar
root committed
96

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
sub add_archive {
    my $self = shift;
    my $arc  = shift;

    return undef unless $arc;
    return undef unless $arc =~ /\A\d{4}-\d{2}\z/;

    my $umask = umask oct $Conf::Conf{'umask'};
    my $error;
    File::Path::make_path(
        $self->{base_directory} . '/' . $arc . '/arctxt',
        {   mode  => 0775,
            owner => Sympa::Constants::USER(),
            group => Sympa::Constants::GROUP(),
            error => \$error
        }
    );
    umask $umask;
root's avatar
root committed
115

116
117
118
119
120
    if (@$error) {
        return undef;
    }
    return 1;
}
121

122
123
124
sub purge_archive {
    my $self = shift;
    my $arc  = shift;
125

126
127
    return undef unless $arc;
    return undef unless $arc =~ /\A\d{4}-\d{2}\z/;
128

129
130
131
    my $error;
    File::Path::remove_tree($self->{base_directory} . '/' . $arc,
        {error => \$error});
132

133
134
    if (@$error) {
        return undef;
salaun's avatar
salaun committed
135
    }
136
137
    return 1;
}
138

139
140
141
142
143
144
145
146
147
148
149
150
151
sub select_archive {
    my $self    = shift;
    my $arc     = shift;
    my %options = @_;

    return undef unless $arc;
    return undef unless $arc =~ /\A\d{4}-\d{2}\z/;

    my $arc_directory     = $self->{base_directory} . '/' . $arc;
    my $directory         = $arc_directory . '/arctxt';
    my $deleted_directory = $arc_directory . '/deleted';

    my $dh;
152
    unless (opendir $dh, $directory) {
IKEDA Soji's avatar
tidyall    
IKEDA Soji committed
153
        if (-d $directory) {
154
            $log->syslog('err', 'Failed to open archive directory %s: %s',
IKEDA Soji's avatar
tidyall    
IKEDA Soji committed
155
                $directory, $ERRNO);
156
157
158
        }
        return;
    }
159
160
161
    closedir $dh;

    undef $self->{_metadatas};
sikeda's avatar
sikeda committed
162
    undef $self->{_html_metadatas};
163
164
165
166
167
168
    $self->{arc_directory}     = $arc_directory;
    $self->{directory}         = $directory;
    $self->{deleted_directory} = $deleted_directory;

    if ($options{info}) {
        return {
169
170
            size  => Sympa::Tools::File::get_dir_size($directory),
            mtime => Sympa::Tools::File::get_mtime($directory),
171
        };
sikeda's avatar
sikeda committed
172
173
174
175
176
177
178
179
    } elsif ($options{count}) {
        my $count;
        if (open my $fh, '<', $self->{arc_directory} . '/index') {
            $count = <$fh>;
            chomp $count;
            close $fh;
        }
        return {count => ($count || 0)};
180
181
182
    } else {
        return $arc;
    }
root's avatar
root committed
183
184
}

185
186
187
188
189
190
191
192
193
194
195
sub fetch {
    my $self    = shift;
    my %options = @_;

    undef $self->{_metadatas};    # Rewind cache.
    while (1) {
        my ($message, $handle) = $self->next;
        last unless $handle;      # No more messages.
        next unless $message;     # Malformed message.

        if ($options{message_id}) {
196
197
198
            my $message_id = Sympa::Tools::Text::canonic_message_id(
                $message->get_header('Message-Id'))
                || '';
199
200
201
202
203
204
205
206
207
            if ($message_id eq $options{message_id}) {
                undef $self->{_metadatas};    # Rewind cache.
                return ($message, $handle);
            }
        }
    }

    return;
}
sympa-authors's avatar
   
sympa-authors committed
208

sikeda's avatar
sikeda committed
209
210
211
212
213
214
215
216
sub html_fetch {
    $log->syslog('debug2', '(%s, %s => %s)', @_);
    my $self    = shift;
    my %options = @_;

    return undef unless $self->{arc_directory};
    return undef unless $options{file};

217
218
219
220
221
222
    my $html_file = $self->{arc_directory} . '/' . $options{file};
    my $handle = IO::File->new($html_file, '<');

    unless ($handle) {
        if (-f $html_file) {
            $log->syslog('err', 'Failed to open archive file %s: %s',
IKEDA Soji's avatar
tidyall    
IKEDA Soji committed
223
                $html_file, $ERRNO);
224
225
226
        }
        return undef;
    }
sikeda's avatar
sikeda committed
227
228
229
230
231
232
233

    my $metadata = {};    # May be empty.
    while (<$handle>) {
        last if /^\s*$/;    ## Metadata end with an emtpy line

        if (/^<!--(\S+): (.*) -->$/) {
            my ($key, $value) = ($1, $2);
234
            $value = Sympa::Tools::Text::decode_html($value);
sikeda's avatar
sikeda committed
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
            if ($key eq 'X-From-R13') {
                $metadata->{'X-From'} = $value;
                # MHonArc protection of email addresses.
                $metadata->{'X-From'} =~ tr/N-Z[@A-Mn-za-m/@A-Z[a-z/;
                # Remove the gecos.
                $metadata->{'X-From'} =~ s/^.*<(.*)>/$1/g;
            }
            $metadata->{$key} = $value;
        }
    }
    seek $handle, 0, 0;
    $metadata->{html_content} = do { local $RS; <$handle> };
    $metadata->{filename} = $options{file};

    return $metadata;
}

252
sub next {
253
254
    my $self    = shift;
    my %options = @_;
255

256
    return unless $self->{directory};
sympa-authors's avatar
   
sympa-authors committed
257

258
259
260
261
262
263
264
265
266
267
    unless ($self->{_metadatas}) {
        my $dh;
        unless (opendir $dh, $self->{directory}) {
            die sprintf 'Cannot open dir %s: %s', $self->{directory}, $ERRNO;
        }
        $self->{_metadatas} = [
            sort _cmp_numeric grep {
                        !/,lock/
                    and !m{(?:\A|/)(?:\.|T\.|BAD-)}
                    and -f ($self->{directory} . '/' . $_)
268
            } readdir $dh
269
270
        ];
        closedir $dh;
271
272
273
274

        # The "reverse" option specific to this class is set.
        $self->{_metadatas} = [reverse @{$self->{_metadatas}}]
            if $options{reverse};
275
    }
276
277
278
    unless (@{$self->{_metadatas}}) {
        undef $self->{_metadatas};
        return;
279
    }
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299

    while (my $marshalled = shift @{$self->{_metadatas}}) {
        my ($lock_fh, $metadata, $message);

        # Try locking message.  Those locked or removed by other process will
        # be skipped.
        $lock_fh =
            Sympa::LockedFile->new($self->{directory} . '/' . $marshalled,
            -1, '+<');
        next unless $lock_fh;

        $metadata =
            Sympa::Spool::unmarshal_metadata($self->{directory}, $marshalled,
            qr{\A(\d+)\z}, [qw(serial)]);

        if ($metadata) {
            my $msg_string = do { local $RS; <$lock_fh> };
            $message = Sympa::Message->new($msg_string, %$metadata);
        }

sikeda's avatar
sikeda committed
300
301
302
        # Metadata doesn't contain context; add it.
        $message->{context} = $self->{context} if $message;

303
304
        # Though message might not be deserialized, anyway return the result.
        return ($message, $lock_fh);
root's avatar
root committed
305
    }
306
    return;
root's avatar
root committed
307
308
}

sikeda's avatar
sikeda committed
309
310
311
312
313
314
315
316
317
sub html_next {
    my $self    = shift;
    my %options = @_;

    return undef unless $self->{arc_directory};

    unless ($self->{_html_metadatas}) {
        my $dh;
        unless (opendir $dh, $self->{arc_directory}) {
IKEDA Soji's avatar
tidyall    
IKEDA Soji committed
318
319
320
321
322
            $log->syslog(
                'err',
                'Cannot open dir %s: %s',
                $self->{arc_directory}, $ERRNO
            );
323
            return undef;
sikeda's avatar
sikeda committed
324
325
326
327
328
329
        }
        $self->{_html_metadatas} = [
            sort _cmp_numeric grep {
                        !/,lock/
                    and !m{(?:\A|/)(?:\.|T\.|BAD-)}
                    and -f ($self->{arc_directory} . '/' . $_)
330
            } readdir $dh
sikeda's avatar
sikeda committed
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
        ];
        closedir $dh;

        # The "reverse" option specific to this class is set.
        $self->{_html_metadatas} = [reverse @{$self->{_html_metadatas}}]
            if $options{reverse};
    }
    unless (@{$self->{_html_metadatas}}) {
        undef $self->{_html_metadatas};
        return undef;
    }

    while (my $marshalled = shift @{$self->{_html_metadatas}}) {
        return $self->html_fetch(file => $marshalled);
    }
    return undef;
}

349
sub _cmp_numeric {
sikeda's avatar
sikeda committed
350
351
352
353
    my $a_num = $1 if defined $a and $a =~ /(\d+)/;
    my $b_num = $1 if defined $b and $b =~ /(\d+)/;
    if (defined $a_num and defined $b_num) {
        return $a_num <=> $b_num || $a cmp $b;
354
355
356
357
358
359
360
361
362
363
364
    } else {
        return $a cmp $b;
    }
}

sub remove {
    $log->syslog('debug2', '(%s, %s)', @_);
    my $self   = shift;
    my $handle = shift;

    return undef unless $self->{arc_directory};
365

366
    my $list = $self->{context};
367

368
369
370
371
372
373
374
375
376
377
378
379
380
381
    # Move text message to deleted/ directory.
    unless (-d $self->{deleted_directory}) {
        my $umask = umask oct $Conf::Conf{'umask'};
        unless (mkdir $self->{deleted_directory}, 0777) {
            die sprintf 'Unable to create %s: %s',
                $self->{deleted_directory}, $ERRNO;
        }
        umask $umask;
    }
    unless (
        $handle->rename($self->{deleted_directory} . '/' . $handle->basename))
    {
        $log->syslog('info', 'Unable to rename message %s in archive %s: %s',
            $handle->basename, $self, Sympa::LockedFile->last_error);
382
        return undef;
sympa-authors's avatar
   
sympa-authors committed
383
    }
384

385
386
    # Remove directory if empty arctxt.
    rmdir $self->{directory};
387

388
389
    return 1;
}
sympa-authors's avatar
   
sympa-authors committed
390

391
# Old name: remove() in archived.pl.
sikeda's avatar
sikeda committed
392
sub html_remove {
393
394
    my $self  = shift;
    my $msgid = shift;
sympa-authors's avatar
   
sympa-authors committed
395

396
397
    return undef unless $self->{arc_directory};
    return undef unless $msgid and $msgid !~ /NO-ID-FOUND\.mhonarc\.org/;
sympa-authors's avatar
   
sympa-authors committed
398

399
    my $list = $self->{context};
sympa-authors's avatar
   
sympa-authors committed
400

401
402
403
404
405
406
    # Remove message from HTML archive.
    system(
        Conf::get_robot_conf($list->{'domain'}, 'mhonarc'),
        '-outdir' => $self->{arc_directory},
        '-rmm'    => $msgid
    );
sympa-authors's avatar
   
sympa-authors committed
407

408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
    return 1;
}

# Does the real job : stores the message given as an argument into
# the indicated directory.
# Old name: (part of) mail2arc() in archived.pl.
sub store {
    my $self    = shift;
    my $message = shift;

    my $list = $self->{context};
    my $arc = POSIX::strftime('%Y-%m', localtime $message->{date});
    my $newfile;

    unless ($self->select_archive($arc)) {
        $self->add_archive($arc);
        unless ($self->select_archive($arc)) {
            $log->syslog('err', 'Cannot create directory %s in archive %s',
                $arc, $self);
            return undef;
        }
sympa-authors's avatar
   
sympa-authors committed
429
430
    }

431
432
433
434
435
436
437
438
439
440
441
442
443
    # Copy the file in the arctxt.
    if (-f $self->{arc_directory} . "/index") {
        open my $fh, '<', $self->{arc_directory} . '/index'
            or die sprintf 'Can\'t read index of %s in %s: %s', $arc, $self,
            $ERRNO;
        $newfile = <$fh>;
        chomp $newfile;
        $newfile++;
        close $fh;
    } else {
        # recreate index file if needed and update it
        $newfile = _create_idx($self->{arc_directory}) + 1;
    }
sympa-authors's avatar
   
sympa-authors committed
444

445
446
447
448
449
450
    # Save arctxt dump of original message.
    open my $fh, '>', $self->{directory} . '/' . $newfile
        or die sprintf 'Can\'t open file %s/%s: %s', $self->{directory},
        $newfile, $ERRNO;
    print $fh $message->as_string;
    close $fh;
sympa-authors's avatar
   
sympa-authors committed
451

452
    _save_idx($self->{arc_directory} . '/index', $newfile);
sympa-authors's avatar
   
sympa-authors committed
453

454
455
456
457
    $log->syslog('notice', 'Message %s is stored into archive %s as <%s>',
        $message, $self, $newfile);
    return $newfile;
}
458

459
# Old name: (part of) mail2arc in archived.pl.
sikeda's avatar
sikeda committed
460
sub html_store {
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
    my $self    = shift;
    my $message = shift->dup;

    my $list = $self->{context};
    my $arc  = POSIX::strftime('%Y-%m', localtime $message->{date});
    my $yyyy = POSIX::strftime('%Y', localtime $message->{date});
    my $mm   = POSIX::strftime('%m', localtime $message->{date});

    unless ($self->select_archive($arc)) {
        $self->add_archive($arc);
        unless ($self->select_archive($arc)) {
            $log->syslog('err', 'Cannot create directory %s in archive %s',
                $arc, $self);
            return undef;
        }
    }
477

478
479
480
    # Prepare clean message content (HTML parts are cleaned)
    unless ($message->clean_html) {
        $log->syslog('err', "Could not clean message, ignoring message");
481
482
        return undef;
    }
483

484
    my $mhonarc_rc = Sympa::search_fullpath($list, 'mhonarc_rc.tt2');
485
486
487
488
489
490
491
492
493
494
495
496

    $log->syslog(
        'debug',
        'Calling %s for list %s',
        Conf::get_robot_conf($list->{'domain'}, 'mhonarc'), $list
    );

    # Call mhonarc on cleaned message source to make clean htlm view of
    # message.
    my @cmd = (
        Conf::get_robot_conf($list->{'domain'}, 'mhonarc'),
        '-add',
sikeda's avatar
sikeda committed
497
        '-addressmodifycode' => '1',    # w/a: Clear old cache in .mhonarc.db.
498
        '-rcfile'     => $mhonarc_rc,
sikeda's avatar
sikeda committed
499
500
        '-outdir'     => $self->{arc_directory},
        '-definevars' => sprintf(
501
            "listname='%s' hostname=%s yyyy=%s mois=%s yyyymm=%s-%s wdir=%s base=%s/arc with_tslice=1 with_powered_by=1",
502
503
504
505
506
507
508
            $list->{'name'},
            $list->{'domain'},
            $yyyy,
            $mm,
            $yyyy,
            $mm,
            Conf::get_robot_conf($list->{'domain'}, 'arc_path'),
IKEDA Soji's avatar
IKEDA Soji committed
509
            (Conf::get_robot_conf($list->{'domain'}, 'wwsympa_url') || ''),
510
511
512
513
514
515
516
517
518
        ),
        '-umask' => $Conf::Conf{'umask'}
    );

    $log->syslog('debug', 'System call: %s', join(' ', @cmd));

    my $pipeout;
    unless (open $pipeout, '|-', @cmd) {
        $log->syslog('err', 'Could not open pipe: %m');
519
        return undef;
sympa-authors's avatar
   
sympa-authors committed
520
    }
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
    print $pipeout $message->as_string;
    close $pipeout;
    my $status = $? >> 8;

    ## Remove lock if required
    if ($status == 75) {
        $log->syslog(
            'notice',
            'Removing lock directory %s',
            $self->{arc_directory} . '/.mhonarc.lck'
        );
        rmdir $self->{arc_directory} . '/.mhonarc.lck';

        my $pipeout;
        unless (open $pipeout, '|-', @cmd) {
            $log->syslog('err', 'Could not open pipe: %m');
            return undef;
538
        }
539
540
541
        print $pipeout $message->as_string;
        close $pipeout;
        $status = $? >> 8;
sympa-authors's avatar
   
sympa-authors committed
542
    }
543
544
545
546
547
548
549
550
551
    if ($status) {
        $log->syslog(
            'err',
            'Command %s failed with exit code %s',
            join(' ', @cmd), $status
        );
    }

    return 1;
sympa-authors's avatar
   
sympa-authors committed
552
553
}

554
555
# DEPRECATED.  No longer used.
#sub store_last;
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575

# DEPRECATED.  Use get_archives() and select_archive().
#sub list;

# Lists the files included in the archive, preformatted for printing
# Returns an array.
sub get_archives {
    $log->syslog('debug2', '(%s)', @_);
    my $self = shift;

    my $base_directory = $self->{base_directory};

    my $dh;
    unless ($base_directory and opendir $dh, $base_directory) {
        $log->syslog('err', 'Cannot open directory %s: %m', $base_directory);
        return;
    }
    my @arcs =
        grep {
        /\A\d\d\d\d\-\d\d\z/
576
            and -d $base_directory . '/'
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
            . $_
            . '/arctxt'
        }
        sort readdir $dh;
    closedir $dh;

    return @arcs;
}

# DEPRECATED.  Use select_archive() and next().
#sub scan_dir_archive;

# DEPRECATED.  Use select_archive() and fetch().
#sub search_msgid;

592
593
594
595
596
597
598
599
600
# Old name: Sympa::List::get_arc_size().
sub get_size {
    my $self = shift;
    my $dir  = shift;

    return 0 unless -d $self->{base_directory};
    return Sympa::Tools::File::get_dir_size($self->{base_directory});
}

601
# OBSOLETED.  No longer used.
root's avatar
root committed
602
sub exist {
603
    my ($name, $file) = @_;
root's avatar
root committed
604
    my $fn = "$name/$file";
605

root's avatar
root committed
606
607
608
609
    return $fn if (-r $fn && -f $fn);
    return undef;
}

sympa-authors's avatar
sympa-authors committed
610
# return path for latest message distributed in the list
611
612
# DEPRECATED.  No longer used.
#sub last_path;
613
614
615

## Load an archived message, returns the mhonarc metadata
## IN : file_path
sikeda's avatar
sikeda committed
616
617
# DEPRECATED.  Use html_fetch() or html_next().
#sub load_html_message;
618

619
# Old name: rebuild() in archived.pl.
sikeda's avatar
sikeda committed
620
sub html_rebuild {
621
622
623
624
625
626
627
628
629
630
631
632
633
634
    my $self = shift;
    my $arc  = shift;

    $arc =~ /^(\d{4})-(\d{2})$/;
    my $yyyy = $1;
    my $mm   = $2;

    return unless $self->select_archive($arc);

    my $list          = $self->{context};
    my $listname      = $list->{'name'};
    my $robot_id      = $list->{'domain'};
    my $arc_directory = $self->{arc_directory};

635
    my $mhonarc_rc = Sympa::search_fullpath($list, 'mhonarc_rc.tt2');
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661

    # Remove existing HTML files and .mhonarc.db.
    my $dh;
    opendir $dh, $arc_directory;
    unlink map { $arc_directory . '/' . $_ }
        grep {
                $_ ne 'arctxt'
            and $_ ne 'index'
            and $_ ne 'deleted'
            and !/\A\.+\z/
        } readdir $dh;
    closedir $dh;

    my $dir_to_rebuild = $self->{directory};
    my $arcs_dir       = $self->_clean_archive_directory($arc);
    if ($arcs_dir) {
        $dir_to_rebuild = $arcs_dir->{'dir_to_rebuild'};
    }

    # recreate index file if needed
    unless (-f $arc_directory . '/index') {
        _create_idx($arc_directory);
    }

    my @cmd = (
        Conf::get_robot_conf($robot_id, 'mhonarc'),
sikeda's avatar
sikeda committed
662
        '-addressmodifycode' => '1',    # w/a: Clear old cache in .mhonarc.db.
663
        '-rcfile'     => $mhonarc_rc,
sikeda's avatar
sikeda committed
664
665
        '-outdir'     => $arc_directory,
        '-definevars' => sprintf(
666
            "listname='%s' hostname=%s yyyy=%s mois=%s yyyymm=%s-%s wdir=%s base=%s/arc with_tslice=1 with_powered_by=1",
667
668
669
670
671
672
673
            $listname,
            $robot_id,
            $yyyy,
            $mm,
            $yyyy,
            $mm,
            Conf::get_robot_conf($robot_id, 'arc_path'),
IKEDA Soji's avatar
IKEDA Soji committed
674
            (Conf::get_robot_conf($robot_id, 'wwsympa_url') || ''),
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
        ),
        '-umask' => $Conf::Conf{'umask'},
        $dir_to_rebuild
    );
    my $exitcode = system(@cmd) >> 8;

    # Delete temporary directory containing files with escaped HTML.
    if ($arcs_dir and -d $arcs_dir->{'cleaned_dir'}) {
        my $error;
        File::Path::remove_tree($arcs_dir->{'cleaned_dir'},
            {error => \$error});
    }

    ## Remove lock if required
    if ($exitcode == 75) {
        $log->syslog(
            'notice',
            'Removing lock directory %s',
            $arc_directory . '/.mhonarc.lck'
        );
        rmdir $arc_directory . '/.mhonarc.lck';

        $exitcode = system(@cmd) >> 8;
    }
    if ($exitcode) {
        $log->syslog(
            'err',
            'Command %s failed with exit code %s',
            join(' ', @cmd), $exitcode
        );
    }
}

# Sets the value of $ENV{'M2H_ADDRESSMODIFYCODE'} and
# $ENV{'M2H_MODIFYBODYADDRESSES'}.
#* $tag a character string (containing the result of _get_tag($list))
711
# NO LONGER USED.
sikeda's avatar
sikeda committed
712
#sub _set_hidden_mode;
713
714

# Empties $ENV{'M2H_ADDRESSMODIFYCODE'}.
715
# NO LONGER USED.
sikeda's avatar
sikeda committed
716
#sub _unset_hidden_mode;
717
718
719
720
721
722
723
724
725
726

# Saves the archives index file
#* $index, a string corresponding to the file name to which save an index.
#* $lst, a character string
# Old name: save_idx() in archived.pl.
sub _save_idx {
    my ($index, $lst) = @_;

    return unless $lst;

727
728
729
730
731
732
    if (open my $fh, '>', $index) {
        print $fh "$lst\n";
        close $fh;
    } else {
        die sprintf 'Couldn\'t overwrite index %s: %s', $index, $ERRNO;
    }
733
734
735
736
737
738
739
740
741
}

# Create the 'index' file for one archive subdir
# Old name: create_idx() in archived.pl.
sub _create_idx {
    my $arc_dir = shift;    ## corresponds to the yyyy-mm directory

    my $arc_txt_dir = $arc_dir . '/arctxt';

742
743
744
745
746
747
748
749
    if (opendir my $dh, $arc_txt_dir) {
        my @files = sort { $a <=> $b; } grep {/^\d+$/} readdir $dh;
        closedir $dh;
        my $index = $files[$#files] || 0;
        _save_idx($arc_dir . '/index', $index);
        return $index;
    } else {
        $log->syslog('err', 'Failed to open directory %s: %m', $arc_txt_dir);
750
751
752
753
754
755
756
757
758
759
760
        return undef;
    }
}

# Old name: clean_archive_directory().
sub _clean_archive_directory {
    $log->syslog('debug3', '(%s, %s)', @_);
    my $self = shift;
    my $arc  = shift;

    return undef unless $self->select_archive($arc);
761
762

    my $answer;
763
764
765
    $answer->{'dir_to_rebuild'} = $self->{directory};
    $answer->{'cleaned_dir'}    = sprintf '%s/%s/%s/arctxt',
        $Conf::Conf{'tmpdir'}, $self->{context}->get_id, $arc;
766
    unless (
767
768
769
        my $number_of_copies = Sympa::Tools::File::copy_dir(
            $answer->{'dir_to_rebuild'},
            $answer->{'cleaned_dir'}
770
        )
Luc Didry's avatar
Luc Didry committed
771
    ) {
772
        $log->syslog(
773
            'err',
774
            'Unable to create a temporary directory where to store files for HTML escaping (%s). Cancelling',
775
776
777
778
            $number_of_copies
        );
        return undef;
    }
779
    if (opendir my $dh, $answer->{'cleaned_dir'}) {
780
        my $files_left_uncleaned = 0;
781
782
783
        foreach my $file (readdir $dh) {
            next if $file =~ /^\./;

784
            $files_left_uncleaned++
785
786
                unless _clean_archived_message(
                $self->{context}->{'domain'},    #FIXME
787
788
789
                $answer->{'cleaned_dir'} . '/' . $file,
                $answer->{'cleaned_dir'} . '/' . $file
                );
790
        }
791
        closedir $dh;
792
        if ($files_left_uncleaned) {
793
            $log->syslog('err',
794
                'HTML cleaning failed for %s files in the directory %s',
795
796
797
798
                $files_left_uncleaned, $answer->{'dir_to_rebuild'});
        }
        $answer->{'dir_to_rebuild'} = $answer->{'cleaned_dir'};
    } else {
799
        $log->syslog(
800
            'err',
801
802
            'Unable to open directory %s: %m',
            $answer->{'dir_to_rebuild'}
803
        );
804
        Sympa::Tools::File::del_dir($answer->{'cleaned_dir'});
805
806
807
808
        return undef;
    }
    return $answer;
}
809

810
811
# Old name: clean_archived_message().
sub _clean_archived_message {
812
    $log->syslog('debug2', '(%s, %s, %s)', @_);
813
814
    my $robot  = shift;
    my $input  = shift;
815
816
    my $output = shift;

817
    my $message = Sympa::Message->new_from_file($input, context => $robot);
818
    unless ($message) {
819
        $log->syslog('err', 'Unable to create a Message object with file %s',
820
821
822
            $input);
        return undef;
    }
823

824
    if ($message->clean_html) {
825
826
827
        if (open my $fh, '>', $output) {
            print $fh $message->as_string;
            close $fh;
828
829
            return 1;
        } else {
830
            $log->syslog(
831
                'err',
832
                'Unable to create a tmp file to write clean HTML to file %s',
833
834
                $output
            );
835
836
837
            return undef;
        }
    } else {
838
        $log->syslog('err', 'HTML cleaning in file %s failed', $output);
839
        return undef;
840
841
    }
}
root's avatar
root committed
842

sikeda's avatar
sikeda committed
843
844
845
846
# Old names archive::convert_single_msg_2_html(),
# Sympa::Archive::convert_single_message().
sub html_format {
    my $message = shift;
847
    my %opts    = @_;
848

sikeda's avatar
sikeda committed
849
    my $that = $message->{context};
850
851
852
    my $list;
    my $robot;
    my $listname;
853
    my $domain;
854
    if (ref $that eq 'Sympa::List') {
855
856
857
        $list     = $that;
        $robot    = $that->{'domain'};
        $listname = $that->{'name'};
858
        $domain   = $that->{'domain'};
859
860
861
862
    } elsif (!ref($that) and $that and $that ne '*') {
        $list     = '';
        $robot    = $that;
        $listname = '';
863
        $domain   = Conf::get_robot_conf($that, 'domain');
864
    } else {
865
        die 'bug in logic.  Ask developer';
866
867
    }

sikeda's avatar
sikeda committed
868
    my $msg_as_string = $message->as_string;
root's avatar
root committed
869

870
    my $destination_dir = $opts{'destination_dir'};
871
    my $attachment_url  = $opts{'attachment_url'};
872
873
874
875
    if (ref $attachment_url eq 'ARRAY') {
        $attachment_url = join '/',
            map { Sympa::Tools::Text::encode_uri($_) } @$attachment_url;
    }
876

877
878
879
    my $mhonarc_rc = Sympa::search_fullpath($that, 'mhonarc_rc.tt2');
    unless ($mhonarc_rc) {
        $log->syslog('notice', 'Cannot find any MHonArc resource file');
880
        return undef;
881
882
883
    }

    unless (-d $destination_dir) {
884
        unless (Sympa::Tools::File::mkdir_all($destination_dir, 0755)) {
885
            $log->syslog('err', 'Unable to create %s', $destination_dir);
886
887
            return undef;
        }
888
    }
889
890

    my $msg_file = $destination_dir . '/msg00000.txt';
891
892
893
894
    if (open my $fh, '>', $msg_file) {
        print $fh $msg_as_string;
        close $fh;
    } else {
sikeda's avatar
sikeda committed
895
        $log->syslog('notice', 'Can\'t open %s', $msg_file);
896
        return undef;
897
    }
898

899
    # mhonarc require du change workdir so this proc must retore it
sikeda's avatar
sikeda committed
900
    my $pwd = Cwd::getcwd();
901

902
903
    ## generate HTML
    unless (chdir $destination_dir) {
904
        $log->syslog('err', 'Could not change working directory to %s',
905
906
            $destination_dir);
        return undef;
907
908
    }

909
    my $exitcode = system(
sikeda's avatar
sikeda committed
910
911
        Conf::get_robot_conf($robot, 'mhonarc'),
        '-single',
912
        '-rcfile'     => $mhonarc_rc,
913
        '-definevars' => sprintf(
914
915
            "listname='%s' hostname=%s yyyy='' mois='' with_tslice='' with_powered_by=''",
            $listname, $domain
sikeda's avatar
sikeda committed
916
        ),
917
        '-outdir'        => $destination_dir,
918
        '-attachmentdir' => $destination_dir,
919
920
921
        '-attachmenturl' => sprintf('<%% path_cgi %%>/%s', $attachment_url),
        '-umask'         => $Conf::Conf{'umask'},
        '-stdout'        => "$destination_dir/msg00000.html",
sikeda's avatar
sikeda committed
922
923
        '--',
        $msg_file
924
    ) >> 8;
925

926
927
    # restore current wd
    chdir $pwd;
928

929
    if ($exitcode) {
930
        $log->syslog(
931
932
933
934
            'err',
            'Command %s failed with exit code %d',
            Conf::get_robot_conf($robot, 'mhonarc'), $exitcode
        );
935
936
    }

937
938
939
    return 1;
}

940
# Old name: Sympa::Archive::get_tag(), get_tag() in archived.pl.
941
942
# No longer used.
#sub _get_tag;
943

944
945
sub get_id {
    my $self = shift;
946

947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
    my $context = $self->{context};
    unless (ref $context eq 'Sympa::List') {
        return '';
    } elsif ($self->{arc_directory}) {
        return sprintf '%s/%s', $context->get_id,
            [split '/', $self->{arc_directory}]->[-1];
    } else {
        return $context->get_id;
    }
}

1;
__END__

=encoding utf-8

=head1 NAME

Sympa::Archive - Archives of Sympa

=head1 SYNOPSIS

  use Sympa::Archive;
970
  $archive = Sympa::Archive->new(context => $list);
971
972
973
974

  @arcs = $archive->get_archives;

  $archive->store($message);
sikeda's avatar
sikeda committed
975
  $archive->html_store($message);
976
977
978
979
980
981

  $archive->select_archive('2015-04');
  ($message, $handle) = $archive->next;

  $archive->select_archive('2015-04');
  ($message, $handle) = $archive->fetch(message_id => $message_id);
sikeda's avatar
sikeda committed
982
  $archive->html_remove($message_id);
983
984
  $archive->remove($handle);

sikeda's avatar
sikeda committed
985
  $archive->html_rebuild('2015-04');
986
987
988
989
990
991

=head1 DESCRIPTION

L<Sympa::Archive> implements the interface to handle archives.

=head2 Methods and functions
992

993
=over
994

995
=item new ( context =E<gt> $list, [ create =E<gt> 1 ] )
996

997
998
I<Constructor>.
Creates new instance of L<Sympa::Archive>.
999

sikeda's avatar
sikeda committed
1000
1001
1002
1003
Parameter:

=over

1004
=item context =E<gt> $list
sikeda's avatar
sikeda committed
1005
1006
1007

Context of object, a L<Sympa::List> instance.

1008
1009
1010
1011
1012
1013
=item create =E<gt> 1

If necessary, creates directory structure of archive.
Dies if creation fails.
This parameter was introduced on Sympa 6.2.47b.

sikeda's avatar
sikeda committed
1014
1015
=back

1016
=item add_archive ( $arc )
1017

1018
1019
1020
I<Instance method>.
Adds archive directory named $arc.
Currently, archive directory must have the form C<YYYY-MM>.
1021

1022
=item purge_archive ( $arc )
1023

1024
1025
1026
I<Instance method>.
Removes archive directory and its content entirely.
removed content can not be recovered.
1027

1028
=item select_archive ( $arc, [ info =E<gt> 1 ] )
1029

1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
I<Instance method>.
Selects an archive directory.
It will be referred by consequent operations.

=item fetch ( message_id =E<gt> $message_id )

I<Instance method>.
Gets a message from archive.
select_archive() must be called in advance.

1040
Message will be locked to prevent multiple processing of a single message.
1041
1042
1043
1044
1045
1046
1047

Parameter:

=over

=item message_id =E<gt> $message_id

1048
Message ID of the message to be fetched.
1049
1050
1051

=back

1052
Returns:
1053

1054
1055
Two-elements list of L<Sympa::Message> instance and filehandle locking
a message.
1056

sikeda's avatar
sikeda committed
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
=item html_fetch ( file =E<gt> $filename )

I<Instance method>.
Gets a metadata of formatted message from HTML archive.
select_archive() must be called in advance.

Parameter:

=over

=item file =E<gt> $filename

1069
File name of the message to be fetched.
sikeda's avatar
sikeda committed
1070
1071
1072
1073
1074
1075
1076
1077

=back

Returns:

Hashref.
Note that message won't be locked.

1078
1079
1080
1081
1082
1083
=item get_size ( )

I<Instance method>.
Gets total size of messages in archives.
This method was introduced on Sympa 6.2.17.

1084
=item next ( [ reverse =E<gt> 1 ] )
1085

1086
1087
1088
I<Instance method>.
Gets next message in archive.
select_archive() must be called in advance.
1089

1090
Message will be locked to prevent multiple processing of a single message.
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100

Parameters:

None.

Returns:

Two-elements list of L<Sympa::Message> instance and filehandle locking
a message.

sikeda's avatar
sikeda committed
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
=item html_next ( [ reverse =E<gt> 1 ] )

I<Instance method>.
Gets next metadata of formatted message in archive.
select_archive() must be called in advance.

Parameters:

None.

Returns:

Hashref.
Note that message will not be locked.

1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
=item remove ( $handle )

I<Instance method>.
Removes a message from archive.

Parameter:

=over

=item $handle

Filehandle, L<Sympa::LockedFile> instance, locking message.
It is returned by L</fetch>() or L</next>().

=back

Returns:

True value if message could be removed.
Otherwise false value.

sikeda's avatar
sikeda committed
1137
=item html_remove ( $message_id )
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170

I<Instance method>.
TBD.

=item store ( $message )

I<Instance method>.
Stores the message into archive.

Parameters:

=over

=item $message

A L<Sympa::Message> instance to be stored.
Following attributes and metadata are referred:

=over

=item {date}

Unix time when the message would be delivered.

=back

=back

Returns:

If storing succeeded, marshalled metadata (file name) of the message.
Otherwise C<undef>.

sikeda's avatar
sikeda committed
1171
=item html_store ( $message )
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181

I<Instance method>.
TBD.

=item get_archives ( )

I<Instance method>.
Gets a list of archive directories this archive contains.
Items of returned value may be fed to select_archive() and so on.

sikeda's avatar
sikeda committed
1182
=item html_rebuild ( $arc )
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202

I<Instance method>.
Rebuilds archives for the list the name of which is given in the argument
$arc.

Parameters:

=over

=item $arc

A character string containing the name of archive directory in the list
which we want to rebuild.

=back

Returns:

I<undef> if something goes wrong.

sikeda's avatar
sikeda committed
1203
1204
=item html_format ( $message,
destination_dir =E<gt> $destination_dir,
1205
attachment_url =E<gt> $attachment_url )
1206

sikeda's avatar
sikeda committed
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
I<Function>.
Converts a message to HTML.

Parameters:

=over

=item $message

Message to be formatted.
L<Sympa::Message> instance.

=item $destination_dir

The directory result is stored in.

=item $attachment_url

1225
1226
1227
1228
Base URL used to link attachments.

Note:
On 6.2.13 and earlier, this option was named "C<attachB<e>ment_url>".
sikeda's avatar
sikeda committed
1229

1230
1231
1232
1233
1234
Note:
On 6.2.17 and later, this option may take an arrayref value.
In such case items will be percent-encoded and conjuncted.
Otherwise if a string is given, it will not be encoded.

sikeda's avatar
sikeda committed
1235
=back
1236
1237
1238
1239
1240
1241
1242