Commit f71abf8d authored by IKEDA Soji's avatar IKEDA Soji
Browse files

- WWSympa: Since CGI of some HTTP servers might split script-path and

  extra-path of script-URI inproperly, we'd be better to reconstruct them:
  SCRIPT_NAME and PATH_INFO.
  Note that we shouldn't use non-standard CGI environment varialbes such as
  REQUEST_URI.
- Additional environment variable SYMPA_DOMAIN stands for available
  mail domain (a.k.a. "robot").
- If no robot providing web service was found according to client's request,
  error response will be returned.
parent 544db3fe
......@@ -38,7 +38,6 @@ use strict;
use lib split(/:/, $ENV{SYMPALIB} || ''), '--modulesdir--';
 
use Archive::Zip qw();
use CGI::Fast qw();
use DateTime;
use DateTime::Format::Mail;
use Digest::MD5;
......@@ -92,6 +91,7 @@ use Sympa::Tools::Text;
use Sympa::Tracking;
use Sympa::User;
use Sympa::WWW::Auth;
use Sympa::WWW::FastCGI;
use Sympa::WWW::Marc::Search;
use Sympa::WWW::Report;
use Sympa::WWW::Session;
......@@ -1050,7 +1050,7 @@ $log->syslog('info', 'WWSympa started, process %d', $PID);
# Main loop.
my $loop_count = 0;
my $start_time = time;
while ($query = CGI::Fast->new) {
while ($query = Sympa::WWW::FastCGI->new) {
$loop_count++;
 
undef $param;
......@@ -1111,13 +1111,10 @@ while ($query = CGI::Fast->new) {
%in = $query->Vars;
 
# Determin robot.
# N.B. As of 6.2.15, the http_host parameter will match with the host name
# and path locally detected by server. If remotely detected host name
# and / or path should be differ, the proxy must adjust them.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional.
$robot = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
unless (Conf::get_robot_conf($robot, 'wwsympa_url')) {
print "Status: 404 Not Found\n";
$robot = $ENV{SYMPA_DOMAIN};
unless ($robot) {
# No robot providing web service found.
print "Status: 421 Misdirected Request\n";
print "\n\n";
next;
}
......
......@@ -188,6 +188,7 @@ nobase_modules_DATA = \
Sympa/Upgrade.pm \
Sympa/User.pm \
Sympa/WWW/Auth.pm \
Sympa/WWW/FastCGI.pm \
Sympa/WWW/Marc.pm \
Sympa/WWW/Marc/Search.pm \
Sympa/WWW/Report.pm \
......
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# Sympa - SYsteme de Multi-Postage Automatique
#
# Copyright 2020 The Sympa Community. See the AUTHORS.md
# file at the top-level directory of this distribution and at
# <https://github.com/sympa-community/sympa.git>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package Sympa::WWW::FastCGI;
use strict;
use warnings;
use base qw(CGI::Fast);
use Sympa::WWW::Tools;
sub new {
my $class = shift;
my @args = @_;
my $self = $class->SUPER::new(@args);
# Determin mail domain (a.k.a. "robot") the request is dispatched.
# N.B. As of 6.2.15, the http_host parameter will match with the host name
# and path locally detected by server. If remotely detected host name
# and / or path should be differ, the proxy must adjust them.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional.
my @vars = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
if (@vars) {
@ENV{qw(SYMPA_DOMAIN SCRIPT_NAME PATH_INFO)} = @vars;
} else {
delete $ENV{SYMPA_DOMAIN};
}
$self;
}
1;
__END__
=encoding utf-8
=head1 NAME
Sympa::WWW::FastCGI - CGI Interface for FastCGI of Sympa
=head1 SYNOPOSIS
TBD.
=head1 DESCRIPTION
TBD.
=head1 SEE ALSO
L<CGI::Fast>.
RFC 3875, The Common Gateway Interface (CGI) Version 1.1.
L<https://tools.ietf.org/html/rfc3875>.
=head1 HISTORY
L<Sympa::WWW::FastCGI> appeared on Sympa 6.2.55b.
=cut
......@@ -33,6 +33,7 @@ use Digest::MD5;
use English qw(-no_match_vars);
use File::Path qw();
use URI;
use URI::Escape qw();
use Sympa;
use Conf;
......@@ -227,65 +228,78 @@ sub get_my_url {
sub get_robot {
my @keys = @_;
my $request_host = _get_server_name();
my $request_path = $ENV{'REQUEST_URI'} || '';
my $robot_id;
if (defined $request_host and length $request_host) {
my $selected_path = '';
foreach my $rid (Sympa::List::get_robots()) {
my $local_url;
foreach my $key (@keys) {
$local_url = Conf::get_robot_conf($rid, $key);
last if $local_url;
}
next unless $local_url;
if ($local_url =~ m{\A[-+\w]+:}) {
;
} elsif ($local_url =~ m{\A//}) {
$local_url = 'http:' . $local_url;
} else {
$local_url = 'http://' . $local_url;
}
# Get host part of script-URI from standard CGI environment variable
# SERVER_NAME.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field
# is _no longer_ referred and this function returns only locally detected
# server name.
my $request_host = lc($ENV{SERVER_NAME} // '');
return unless length $request_host;
my $ipv6_re = Sympa::Regexps::ipv6();
if ($request_host =~ /\A$ipv6_re\z/) { # IPv6 address
$request_host = sprintf '[%s]', $request_host;
}
# Since CGI of some HTTP servers might split script-path and extra-path of
# script-URI inproperly, we'd be better to reconstruct them from these
# standard CGI environment variables:
# - SCRIPT_NAME: a URI path which could identify the CGI script.
# - PATH_INFO: derived from the portion of the URI path hierarchy
# following the part that identifies the script itself.
# Note that they are not URL-encoded, unlike non-standard REQUEST_URI.
my $org_script_name = $ENV{SCRIPT_NAME} // '';
my $org_path_info = $ENV{PATH_INFO} // '';
return unless '' eq $org_script_name or 0 == index $org_script_name, '/';
return unless '' eq $org_path_info or 0 == index $org_path_info, '/';
my $request_path = $org_script_name . $org_path_info;
# Find mail domain (a.k.a. "robot") of which web URL matches script-URI.
my ($robot_id, $script_path) = (undef, '');
foreach my $rid (Sympa::List::get_robots()) {
my $local_url;
foreach my $key (@keys) {
$local_url = Conf::get_robot_conf($rid, $key);
last if $local_url;
}
next unless $local_url;
my $uri = URI->new($local_url);
next
unless $uri
and $uri->scheme
and grep { $uri->scheme eq $_ } qw(http https);
my $host = lc($uri->host || '');
my $path = $uri->path || '/';
#FIXME:might need percent-decode hosts and/or paths
next
unless $request_host eq $host
and 0 == index $request_path, $path;
# The longest path wins.
($robot_id, $selected_path) = ($rid, $path)
if length $selected_path < length $path;
if ($local_url =~ m{\A[-+\w]+:}) {
;
} elsif ($local_url =~ m{\A//}) {
$local_url = 'http:' . $local_url;
} else {
$local_url = 'http://' . $local_url;
}
}
return (defined $robot_id) ? $robot_id : $Conf::Conf{'domain'};
}
my $uri = URI->new($local_url);
next
unless $uri
and $uri->scheme
and grep { $uri->scheme eq $_ } qw(http https);
# Old name: (part of) get_header_field() in wwsympa.fcgi.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field is
# _no longer_ referred and this function returns only locally detected server
# name.
sub _get_server_name {
my $server = $ENV{SERVER_NAME};
return undef unless defined $server and length $server;
my $host = lc URI::Escape::uri_unescape($uri->host // '');
my $path = URI::Escape::uri_unescape($uri->path // '/');
next unless $request_host eq $host;
next
unless $request_path eq $path
or 0 == index($request_path, $path . '/');
my $ipv6_re = Sympa::Regexps::ipv6();
if ($server =~ /\A$ipv6_re\z/) { # IPv6 address
$server = "[$server]";
# The longest path wins.
($robot_id, $script_path) = ($rid, $path)
if length $script_path < length $path;
}
return lc $server;
return unless $robot_id;
return
wantarray
? ($robot_id, $script_path, substr $request_path, length $script_path)
: $robot_id;
}
# Old name: (part of) get_header_field() in wwsympa.fcgi.
# No longer used.
#sub _get_server_name;
# Old name: (part of) get_header_field() in wwsympa.fcgi.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Host:" request field is
# _no longer_ referred and this function returns only locally detected host
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment