Commit 4df890be authored by olivier.salaun's avatar olivier.salaun
Browse files

***** Changes: web pages are now utf-8 encoded, as well as mhonarc HTML

***** archives that can now mix different character sets within a single
***** web archive. Sympa now uses perl's I/O layer to encode/decode characters.
***** The web_recode_to parameter is now obsolete ; the new
***** filesystem_encoding parameter allows you defining how the configuration
***** files are encoded on disk. A new mhonarc-ressources.tt2 file is provided
***** and should replace any custom one.


git-svn-id: https://subversion.renater.fr/sympa/trunk@3836 05aa8bb8-cd2b-0410-b1d7-8918dfa770ce
parent 14ece504
......@@ -2439,18 +2439,9 @@ The maximum size of the uploaded picture file (bytes)
This is the default language for \Sympa. The message catalog (.po, compiled as a .mo file) located
in the corresponding \cfkeyword {locale} directory will be used.
\subsection {\cfkeyword {web\_recode\_to}}
\subsection {\cfkeyword {web\_recode\_to}} (OBSOLETE)
If you set this parameter to a charset then web pages will be recoded to this specified charset. This is usefull to have web pages
in UTF-8, allowing multi-lingual contents. You should check that customized web templates, topics.conf, list config files, info files are
all using the same charset.
Example :
\begin {quote}
\begin{verbatim}
web_recode_to utf-8
\end{verbatim}
\end {quote}
All web pages are now encoded in utf-8.
Note : if you recode web pages to utf-8, you should also add the following tag to your \file {mhonarc-ressources.tt2} file :
\begin {quote}
......@@ -2461,6 +2452,17 @@ utf-8; MHonArc::UTF8::to_utf8; MHonArc/UTF8.pm
\end{verbatim}
\end {quote}
\subsection {\cfkeyword {filesystem\_encoding}}
\index{filesystem-encoding}
\default {utf-8}
\example {filesystem\_encoding iso-8859-1}
Sympa (and Perl) use utf-8 as the its internal encoding and
also for the encoding of web pages. Because you might use a
different character encoding on your filesystem, you need to
declare it, so that Sympa is able to properly decode strings.
\section {Bounce related}
......@@ -3597,6 +3599,43 @@ Note that you must start the \file {sympa.pl} process with the web interface ; i
mail messages including password reminders.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Sympa Internationalization
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\cleardoublepage
\chapter {Sympa Internationalization}
\label{i18n}
\index{i18n}
\section {Translating Sympa GUI}
\section {Defining language-specific templates}
\section {Handling of encodings}
Until version 5.3, Sympa web pages were encoded in each language's
encoding (iso-8859-1 for French, utf-8 for Japanese,...) whereas every
web page is now encoded in utf-8. Thanks to
the \perlmodule {Encode} Perl module, Sympa can now juggle with the
filesystem encoding, each message catalog's encoding and its web
encoding (utf-8).
If your operating system uses a character encoding different from
utf-8, then you should declare it using the \cfkeyword
{filesystem\_encoding} sympa.conf parameter (see
\ref {filesystem-encoding}, page~\pageref {filesystem-encoding}). It is required to do so
because Sympa has no way to find out what encoding is used for its
configuration files. Once this encoding is known, every template or
configuration parameter can be read properly for the web and also
saved properly when edited from the web interface.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Sympa RSS Channel
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
......
......@@ -185,16 +185,24 @@ Content-Transfer-Encoding: [%|loc%]_encoding_[%END%]
[%|loc%]Check Sympa log files for more precise information.[%END%]
[% ELSIF type == 'load_admin_file_error' -%]
Subject: [% FILTER qencode %][%|loc(param0)%]list %1 in status error_config[%END%][%END%]
[% ELSIF type == 'param_constraint_conf_error' -%]
Subject: [% FILTER qencode %][%|loc%]errors in param_constraint.conf file[%END%][%END%]
MIME-Version: 1.0
Content-Type: text/plain; charset=[%|loc%]_charset_[%END%];
Content-Transfer-Encoding: [%|loc%]_encoding_[%END%]
[%|loc%]Sympa could not load the list config file.[%END%]
[%|loc%]The list status has been set to error_config.[%END%]
[%|loc(param0)%]Error(s) found in the file %1.[%END%]
[%|loc%]Check Sympa log files for more precise information.[%END%]
[% ELSIF type == 'file_removed' -%]
Subject: [% FILTER qencode %][%|loc%]File removed[%END%][%END%]
MIME-Version: 1.0
Content-Type: text/plain; charset=[%|loc%]_charset_[%END%];
Content-Transfer-Encoding: [%|loc%]_encoding_[%END%]
[%|loc(param0)%]The following file has been removed : %1[%END%]
[%|loc(param1)%]A backup of the file has been made : %1[%END%]
[% ELSIF type == 'no_list_family' -%]
Subject: [% FILTER qencode %][%|loc(param0)%]list %1 in status error_config[%END%][%END%]
......
......@@ -58,13 +58,14 @@ my @valid_options = qw(
css_url css_path static_content_path static_content_url pictures_max_size pictures_feature
ldap_export_name ldap_export_host ldap_export_suffix ldap_export_password
ldap_export_dnmanager ldap_export_connection_timeout update_db_field_types urlize_min_size
list_check_smtp list_check_suffixes spam_protection web_archive_spam_protection soap_url
list_check_smtp list_check_suffixes filesystem_encoding spam_protection web_archive_spam_protection soap_url
web_recode_to use_blacklist
);
my %old_options = ('trusted_ca_options' => 'capath,cafile',
'msgcat' => 'localedir',
'queueexpire' => '');
'queueexpire' => '',
'web_recode_to' => '');
my %valid_options = ();
map { $valid_options{$_}++; } @valid_options;
......@@ -211,7 +212,6 @@ my %Default_Conf =
'css_path' => '',## Defined below
'urlize_min_size' => 10240, ## 10Kb
'supported_lang' => 'de,cs,el,es,et_EE,en_US,fr,hu,it,ja_JP,nl,oc,pt_BR,sv,tr',
'web_recode_to' => '',
'default_remind_task' => '',
'update_db_field_types' => 'auto',
'logo_html_definition' => '',
......@@ -221,7 +221,8 @@ my %Default_Conf =
'pictures_feature' => 'off',
'use_blacklist' => 'send,subscribe',
'static_content_url' => '/static-sympa',
'static_content_path' => '--DIR--/static_content'
'static_content_path' => '--DIR--/static_content',
'filesystem_encoding' => 'utf-8'
);
......@@ -585,7 +586,7 @@ sub load_robots {
## CSS
$robot_conf->{$robot}{'css_url'} ||= $robot_conf->{$robot}{'static_content_url'}.'/css';
$robot_conf->{$robot}{'css_path'} ||= $Conf{'static_content_path'}.'/css/'.$robot;
$robot_conf->{$robot}{'sympa'} = $robot_conf->{$robot}{'email'}.'@'.$robot_conf->{$robot}{'host'};
$robot_conf->{$robot}{'request'} = $robot_conf->{$robot}{'email'}.'-request@'.$robot_conf->{$robot}{'host'};
$robot_conf->{$robot}{'cookie_domain'} ||= 'localhost';
......
......@@ -43,7 +43,7 @@ my %set_comment; #sets-of-messages comment
## The lang is the NLS catalogue name ; locale is the locale preference
## Ex: lang = fr ; locale = fr_FR
my ($current_lang, $current_locale, @previous_locale);
my ($current_lang, $current_locale, $current_charset, @previous_locale);
my $default_lang;
## This was the old style locale naming, used for templates, nls, scenario
my %language_equiv = ( 'zh_CN' => 'cn',
......@@ -102,8 +102,6 @@ my %locale2charset = ('cs_CZ' => 'utf-8',
'zh_TW' => 'big5',
);
my $recode;
sub GetSupportedLanguages {
my $robot = shift;
my @lang_list;
......@@ -183,25 +181,26 @@ sub SetLang {
## Define what catalog is used
&Locale::Messages::textdomain("sympa");
&Locale::Messages::bindtextdomain('sympa','--LOCALEDIR--');
&Locale::Messages::bind_textdomain_codeset('sympa',$recode) if $recode;
#bind_textdomain_codeset sympa => 'iso-8859-1';
$current_lang = $lang;
$current_locale = $locale;
$current_charset = gettext("_charset_");
return $locale;
}#SetLang
sub set_recode {
$recode = shift;
}
sub GetLang {
############
return $current_lang;
}
sub GetCharset {
return $current_charset;
}
sub Locale2Lang {
my $locale = shift;
my $lang;
......@@ -256,9 +255,7 @@ sub gettext {
return $language;
}
}elsif ($var eq 'charset') {
if ($recode) {
return $recode;
} elsif (/^Content-Type:\s*.*charset=(\S+)$/i) {
if (/^Content-Type:\s*.*charset=(\S+)$/i) {
return $1;
}
}elsif ($var eq 'encoding') {
......@@ -270,7 +267,11 @@ sub gettext {
return '';
}
&Locale::Messages::gettext(@_);
## Decode from catalog encoding
my $translation = &Encode::decode($current_charset, &Locale::Messages::gettext(@_));
return $translation;
}
1;
......
......@@ -11706,6 +11706,44 @@ sub upgrade {
}
}
## New mhonarc ressource file with utf-8 recoding
if (&tools::lower_version($previous_version, '5.3a.6')) {
&do_log('notice','Looking for customized mhonarc-ressources.tt2 files...');
foreach my $vr (keys %{$Conf::Conf{'robots'}}) {
my $etc_dir = $Conf::Conf{'etc'};
if ($vr ne $Conf::Conf{'host'}) {
$etc_dir .= '/'.$vr;
}
if (-f $etc_dir.'/mhonarc-ressources.tt2') {
my $new_filename = $etc_dir.'/mhonarc-ressources.tt2'.'.'.time;
rename $etc_dir.'/mhonarc-ressources.tt2', $new_filename;
&do_log('notice', "Custom %s file has been backed up as %s", $etc_dir.'/mhonarc-ressources.tt2', $new_filename);
&List::send_notify_to_listmaster('file_removed',$Conf::Conf{'host'},
[$etc_dir.'/mhonarc-ressources.tt2', $new_filename]);
}
}
&do_log('notice','Rebuilding web archives...');
my $all_lists = &List::get_lists('*');
foreach my $list ( @$all_lists ) {
next unless (defined $list->{'admin'}{'web_archive'});
my $file = $Conf{'queueoutgoing'}.'/.rebuild.'.$list->get_list_id();
unless (open REBUILD, ">$file") {
&do_log('err','Cannot create %s', $file);
next;
}
print REBUILD ' ';
close REBUILD;
}
}
return 1;
}
......@@ -12202,7 +12240,7 @@ sub _load_admin_file {
$admin{'defaults'}{$pname} = 1 unless ($::pinfo{$pname}{'internal'});
}
unless (open CONFIG, $config_file) {
unless (open CONFIG, "<:encoding($Conf{'filesystem_encoding'})", $config_file) {
&do_log('info', 'Cannot open %s', $config_file);
}
......@@ -12499,7 +12537,7 @@ sub _save_admin_file {
return undef;
}
unless (open CONFIG, ">$config_file") {
unless (open CONFIG, , ">:encoding($Conf{'filesystem_encoding'})", "$config_file") {
&do_log('info', 'Cannot open %s', $config_file);
return undef;
}
......
......@@ -118,6 +118,9 @@ default; -decode-
<TextClipFunc>
MHonArc::UTF8::clip; MHonArc/UTF8.pm
</TextClipFunc>
<TextEncode>
utf-8; MHonArc::UTF8::to_utf8; MHonArc/UTF8.pm
</TextEncode>
<!-- ============================================================== -->
<!-- Options -->
......
......@@ -25,6 +25,7 @@ package Sympa::Template::Compat;
use strict;
use base 'Template::Provider';
use Encode;
my @other_include_path;
my $allow_absolute;
......@@ -150,6 +151,30 @@ sub escape_quote {
return $string;
}
sub encode_utf8 {
my $string = shift;
## Skip if already internally tagged utf8
if (&Encode::is_utf8($string)) {
return &Encode::encode_utf8($string);
}
return $string;
}
sub decode_utf8 {
my $string = shift;
## Skip if already internally tagged utf8
unless (&Encode::is_utf8($string)) {
return &Encode::decode_utf8($string);
}
return $string;
}
sub maketext {
my ($context, @arg) = @_;
......@@ -202,13 +227,6 @@ sub parse_tt2 {
$template = \join('', @$template);
}
# Do we need to recode strings
# maketext will check the $recode variable
if (defined $options &&
$options->{'recode'}) {
&Language::set_recode( $options->{'recode'});
}
# quick hack! wrong layer!
# s|^/home/sympa/bin/etc/wws_templates/(.*?)(\...)?(\.tpl)|$1.tt2|
# for values %$data;
......@@ -230,6 +248,8 @@ sub parse_tt2 {
escape_xml => [\&escape_xml, 0],
escape_url => [\&escape_url, 0],
escape_quote => [\&escape_quote, 0],
decode_utf8 => [\&decode_utf8, 0],
encode_utf8 => [\&encode_utf8, 0]
}
};
......@@ -245,15 +265,10 @@ sub parse_tt2 {
&do_log('err', 'Failed to parse %s : %s', $template, $tt2->error());
&do_log('err', 'Looking for TT2 files in %s', join(',',@{$include_path}));
# Reset $recode
&Language::set_recode();
return undef;
}
# Reset $recode
&Language::set_recode();
return 1;
}
......
<!-- $Id$ -->
<!-- begin : this part is generated by mhonarc using mhonarc-ressources.tt2 -->
[% PROCESS $file IF file %]
[% PROCESS $file %]
<!-- end of part generated by mhonarc -->
<?xml version="1.0" encoding="[%|loc%]_charset_[%END%]" ?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<?xml version="1.0" encoding="utf-8" ?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xml:lang="[% iso639 = locale.match('^(.)(.)'); iso639.0; iso639.1 %]">
<!-- begin main.tt2 (distrib) -->
......@@ -27,11 +27,7 @@
<!-- <link rel="alternative stylesheet" href="[% css_url %]/print-preview.css" type="text/css" media="screen" title="Print Preview" /> -->
<meta name="generator" content="Sympa" />
[% IF conf.web_recode_to %]
<meta http-equiv="content-type" content="text/html; charset=[% conf.web_recode_to %]" />
[% ELSE %]
<meta http-equiv="content-type" content="text/html; charset=[%|loc%]_charset_[%END%]" />
[% END %]
<meta http-equiv="content-type" content="text/html; charset="utf-8" />
[% PROCESS javascript.tt2 %]
......
......@@ -2,7 +2,7 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html; charset=[%|loc%]_charset_[%END%]" />
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<style type="text/css" title="style sympa">
......
......@@ -53,6 +53,8 @@ use admin;
use SharedDocument;
use report;
 
use open ':utf8'; ## Default is to consider files utf8
use Mail::Header;
use Mail::Address;
 
......@@ -583,6 +585,13 @@ if ($wwsconf->{'use_fast_cgi'}) {
my $all_lists = &List::get_lists('*') unless ($maintenance_mode);
}
 
## Set output encoding
## All outgoing strings will be recoded transparently using this charset
binmode STDOUT, ":utf8";
## Incoming data is utf8-encoded
binmode STDIN, ":utf8";
## Main loop
my $loop_count;
my $start_time = &POSIX::strftime("%d %b %Y at %H:%M:%S", localtime(time));
......@@ -678,7 +687,7 @@ if ($wwsconf->{'use_fast_cgi'}) {
$param->{'conf'} = {};
foreach my $p ('email','host','sympa','request','soap_url','wwsympa_url','listmaster_email','logo_html_definition',
'dark_color','light_color','text_color','bg_color','error_color','use_blacklist',
'selected_color','shaded_color','web_recode_to','color_0','color_1','color_2','color_3','color_4','color_5','color_6','color_7','color_8','color_9','color_10','color_11','color_12','color_13','color_14','color_15') {
'selected_color','shaded_color','color_0','color_1','color_2','color_3','color_4','color_5','color_6','color_7','color_8','color_9','color_10','color_11','color_12','color_13','color_14','color_15') {
$param->{'conf'}{$p} = &Conf::get_robot_conf($robot, $p);
$param->{$p} = &Conf::get_robot_conf($robot, $p) if (($p =~ /_color$/)|| ($p =~ /color_/));
}
......@@ -904,6 +913,7 @@ if ($wwsconf->{'use_fast_cgi'}) {
&do_maintenance();
$param->{'action'} = 'maintenance';
}else {
## Session loop
while ($action) {
unless (&check_param_in()) {
......@@ -922,7 +932,8 @@ if ($wwsconf->{'use_fast_cgi'}) {
$param->{'lang'} = $param->{'cookie_lang'} || $param->{'user_lang'} ||
$param->{'list_lang'} || &Conf::get_robot_conf($robot, 'lang');
$param->{'list_lang'} || &Conf::get_robot_conf($robot, 'lang');
$param->{'locale'} = &Language::SetLang($param->{'lang'});
&export_topics ($robot);
......@@ -1082,14 +1093,12 @@ if ($wwsconf->{'use_fast_cgi'}) {
## Available languages
my $saved_lang = &Language::GetLang();
 
# Recode the language strings to the correct codeset
&Language::set_recode ($Conf{'web_recode_to'} || &Language::gettext('_charset_'));
foreach my $l (@{&Language::GetSupportedLanguages($robot)}) {
&Language::SetLang($l) || next;
 
if (gettext("_charset_")) {
$param->{'languages'}{$l}{'complete'} = &Encode::decode(gettext("_charset_"), gettext("_language_"));
if (gettext("_language_")) {
$param->{'languages'}{$l}{'complete'} = gettext("_language_");
}else {
$param->{'languages'}{$l}{'complete'} = $l;
}
......@@ -1100,7 +1109,7 @@ if ($wwsconf->{'use_fast_cgi'}) {
$param->{'languages'}{$l}{'selected'} = '';
}
}
&Language::set_recode (); ## Unset recoding
&Language::SetLang($saved_lang);
 
# if bypass is defined select the content-type from various vars
......@@ -1153,8 +1162,7 @@ if ($wwsconf->{'use_fast_cgi'}) {
}elsif ($rss) {
## Send RSS
print "Cache-control: no-cache\n";
my $charset = gettext("_charset_");
print "Content-Type: application/rss+xml; charset=$charset\n\n";
print "Content-Type: application/rss+xml; charset=utf-8\n\n";
## Icons
$param->{'icons_url'} = $wwsconf->{'icons_url'};
......@@ -1169,19 +1177,15 @@ if ($wwsconf->{'use_fast_cgi'}) {
my $lang = &Language::Lang2Locale($param->{'lang'});
my $tt2_include_path = &tools::make_tt2_include_path($robot,'web_tt2',$lang,$list);
## Recode to utf-8 for RSS
my $tt2_options = {};
if ($Conf{'web_recode_to'}) {
$tt2_options = {'recode' => $Conf{'web_recode_to'}};
}
unless (&tt2::parse_tt2($param,'rss.tt2' ,\*STDOUT, $tt2_include_path, $tt2_options)) {
unless (&tt2::parse_tt2($param,'rss.tt2' ,\*STDOUT, $tt2_include_path, {})) {
my $error = &tt2::get_error();
$param->{'tt2_error'} = $error;
unless (&List::send_notify_to_listmaster('web_tt2_error', $robot, [$error])) {
&wwslog('notice','Unable to send notify "web_tt2_error" to listmaster');
}
}
# close FILE;
}elsif ($param->{'redirect_to'}) {
do_log ('debug',"Redirecting to $param->{'redirect_to'}");
......@@ -1422,11 +1426,7 @@ sub get_header_field {
$in{$p} =~ s/\015//g;
 
## Convert from the web encoding to unicode string
if ($Conf{'web_recode_to'}){
if (require "Encode.pm") {
$p = &Encode::decode($Conf{'web_recode_to'}, $p);
}
}
$in{$p} = &Encode::decode('utf-8', $in{$p});
 
my @tokens = split /\./, $p;
my $pname = $tokens[0];
......@@ -1485,18 +1485,15 @@ sub send_html {
my $lang = &Language::Lang2Locale($param->{'lang'});
my $tt2_include_path = &tools::make_tt2_include_path($robot,'web_tt2',$lang,$list);
my $tt2_options = {};
if ($Conf{'web_recode_to'}) {
$tt2_options = {'recode' => $Conf{'web_recode_to'}};
}
unless (&tt2::parse_tt2($param,$tt2_file , \*STDOUT, $tt2_include_path, $tt2_options)) {
unless (&tt2::parse_tt2($param,$tt2_file , \*STDOUT, $tt2_include_path, {})) {
my $error = &tt2::get_error();
$param->{'tt2_error'} = $error;
&List::send_notify_to_listmaster('web_tt2_error', $robot, [$error]);
&tt2::parse_tt2($param,'tt2_error.tt2' , \*STDOUT, $tt2_include_path);
}
}
 
sub prepare_report_user {
......@@ -6283,12 +6280,11 @@ sub do_viewmod {
$param->{'bypass'} = 1;
}
 
$param->{'file'} = $arc_file_path;
}else {
if ($in{'arc_file'} =~ /^(msg\d+)\.html$/) {
# Get subject message thanks to X-Subject field (<!--X-Subject: x -->)
open (FILE, $arc_file_path);
open (FILE, '<:utf8', $arc_file_path);
while (<FILE>) {
if (/<!--X-Subject: (.+) -->/) {
$param->{'subtitle'} = $1;
......@@ -6296,12 +6292,16 @@ sub do_viewmod {
}
}
close FILE;
}
&tt2::add_include_path($arc_month_path);
$param->{'file'} = $in{'arc_file'};
}
 
## Provide a filehandle to the TT2 parser (instead of a filename previously)
## It allows to set the appropriate utf8 binmode on the FH
open $param->{'file'}, '<:utf8', $arc_file_path;
my @stat = stat ($arc_file_path);
$param->{'date'} = $stat[9];
 
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment