Unverified Commit d2a043dd authored by IKEDA Soji's avatar IKEDA Soji Committed by GitHub
Browse files

Merge pull request #910 from ikedas/issue-879_trial-2 by ikedas

Split request URI into SCRIPT_NAME and PATH_INFO by Sympa itself
parents 7dee23b6 c23bd70a
......@@ -42,6 +42,7 @@ check_SCRIPTS = \
t/Tools_SMIME.t \
t/Tools_Text.t \
t/Tools_Time.t \
t/WWW_Tools.t \
t/compile_executables.t \
t/compile_modules.t \
t/compile_scenarios.t \
......
......@@ -38,7 +38,6 @@ use strict;
use lib split(/:/, $ENV{SYMPALIB} || ''), '--modulesdir--';
 
use Archive::Zip qw();
use CGI::Fast qw();
use DateTime;
use DateTime::Format::Mail;
use Digest::MD5;
......@@ -92,6 +91,7 @@ use Sympa::Tools::Text;
use Sympa::Tracking;
use Sympa::User;
use Sympa::WWW::Auth;
use Sympa::WWW::FastCGI;
use Sympa::WWW::Marc::Search;
use Sympa::WWW::Report;
use Sympa::WWW::Session;
......@@ -1049,7 +1049,7 @@ $log->syslog('info', 'WWSympa started, process %d', $PID);
# Main loop.
my $loop_count = 0;
my $start_time = time;
while ($query = CGI::Fast->new) {
while ($query = Sympa::WWW::FastCGI->new) {
$loop_count++;
 
undef $param;
......@@ -1106,17 +1106,23 @@ while ($query = CGI::Fast->new) {
## Though I don't know why, __DIE__ handler is cleared after INIT.
Sympa::Crash::register_handler();
 
foreach my $envvar (
qw(ORIG_PATH_INFO ORIG_SCRIPT_NAME
PATH_INFO QUERY_STRING REMOTE_ADDR REMOTE_HOST REQUEST_METHOD
SCRIPT_NAME SERVER_NAME SERVER_PORT
SYMPA_DOMAIN)
) {
$log->syslog('debug', '%s=%s', $envvar, $ENV{$envvar});
}
## Get params in a hash
%in = $query->Vars;
 
# Determin robot.
# N.B. As of 6.2.15, the http_host parameter will match with the host name
# and path locally detected by server. If remotely detected host name
# and / or path should be differ, the proxy must adjust them.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional.
$robot = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
unless (Conf::get_robot_conf($robot, 'wwsympa_url')) {
print "Status: 404 Not Found\n";
$robot = $ENV{SYMPA_DOMAIN};
unless ($robot) {
# No robot providing web service found.
print "Status: 421 Misdirected Request\n";
print "\n\n";
next;
}
......@@ -1940,8 +1946,6 @@ sub _crash_handler {
sub _split_params {
my $args_string = shift;
 
$log->syslog('debug', "PATH_INFO: %s", $ENV{'PATH_INFO'});
$args_string =~ s+^/++;
 
my $ending_slash = 0;
......@@ -3857,7 +3861,7 @@ sub do_help {
# Strip extensions.
$in{'help_topic'} =~ s/[.].*// if $in{'help_topic'};
# Given partial top URI, redirect to base.
unless ($in{'help_topic'} or $ENV{REQUEST_URI} =~ /\/\z/) {
unless ($in{'help_topic'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) {
$param->{'redirect_to'} = Sympa::get_url(
$robot, 'help',
nomenu => $param->{'nomenu'},
......@@ -8645,7 +8649,7 @@ sub do_arc {
);
return 1;
}
unless ($in{'arc_file'} or $ENV{REQUEST_URI} =~ /\/\z/) {
unless ($in{'arc_file'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) {
$param->{'redirect_to'} = Sympa::get_url(
$list, 'arc',
nomenu => $param->{'nomenu'},
......
......@@ -189,6 +189,7 @@ nobase_modules_DATA = \
Sympa/Upgrade.pm \
Sympa/User.pm \
Sympa/WWW/Auth.pm \
Sympa/WWW/FastCGI.pm \
Sympa/WWW/Marc.pm \
Sympa/WWW/Marc/Search.pm \
Sympa/WWW/Report.pm \
......
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# Sympa - SYsteme de Multi-Postage Automatique
#
# Copyright 2020 The Sympa Community. See the AUTHORS.md
# file at the top-level directory of this distribution and at
# <https://github.com/sympa-community/sympa.git>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package Sympa::WWW::FastCGI;
use strict;
use warnings;
use base qw(CGI::Fast);
use Sympa::WWW::Tools;
sub new {
my $class = shift;
my @args = @_;
my $self = $class->SUPER::new(@args);
# Determin mail domain (a.k.a. "robot") the request is dispatched.
# N.B. As of 6.2.15, the http_host parameter will match with the host name
# and path locally detected by server. If remotely detected host name
# and / or path should be differ, the proxy must adjust them.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional.
my @vars = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
if (@vars) {
@ENV{qw(ORIG_SCRIPT_NAME ORIG_PATH_INFO)} =
@ENV{qw(SCRIPT_NAME PATH_INFO)};
@ENV{qw(SYMPA_DOMAIN SCRIPT_NAME PATH_INFO)} = @vars;
} else {
delete $ENV{SYMPA_DOMAIN};
}
$self;
}
1;
__END__
=encoding utf-8
=head1 NAME
Sympa::WWW::FastCGI - CGI Interface for FastCGI of Sympa
=head1 SYNOPOSIS
TBD.
=head1 DESCRIPTION
TBD.
=head1 SEE ALSO
L<CGI::Fast>.
RFC 3875, The Common Gateway Interface (CGI) Version 1.1.
L<https://tools.ietf.org/html/rfc3875>.
=head1 HISTORY
L<Sympa::WWW::FastCGI> appeared on Sympa 6.2.55b.
=cut
......@@ -33,6 +33,7 @@ use Digest::MD5;
use English qw(-no_match_vars);
use File::Path qw();
use URI;
use URI::Escape qw();
use Sympa;
use Conf;
......@@ -201,38 +202,47 @@ sub get_my_url {
my $robot = shift;
my %options = @_;
my $original_path_info;
my $path_info = $ENV{PATH_INFO} // '';
my $query_string = $ENV{QUERY_STRING} // '';
# Try getting encoded PATH_INFO and query.
my $request_uri = $ENV{REQUEST_URI} || '';
my $script_name = $ENV{SCRIPT_NAME} || '';
if ( $request_uri eq $script_name
or 0 == index($request_uri, $script_name . '?')
or 0 == index($request_uri, $script_name . '/')) {
$original_path_info = substr($request_uri, length $script_name);
} else {
# Workaround: Encode PATH_INFO again and use it.
my $path_info = $ENV{PATH_INFO} || '';
my $query_string = $ENV{QUERY_STRING};
$original_path_info =
Sympa::Tools::Text::encode_uri($path_info, omit => '/')
. ($query_string ? ('?' . $query_string) : '');
}
return Sympa::get_url($robot, undef, authority => $options{authority})
. $original_path_info;
return
Sympa::get_url($robot, undef, authority => $options{authority})
. Sympa::Tools::Text::encode_uri($path_info, omit => '/')
. (length $query_string ? '?' : '')
. $query_string;
}
# Determine robot.
sub get_robot {
my @keys = @_;
my $request_host = _get_server_name();
my $request_path = $ENV{'REQUEST_URI'} || '';
my $robot_id;
if (defined $request_host and length $request_host) {
my $selected_path = '';
# Get host part of script-URI from standard CGI environment variable
# SERVER_NAME.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field
# is _no longer_ referred and this function returns only locally detected
# server name.
my $request_host = lc($ENV{SERVER_NAME} // '');
return unless length $request_host;
my $ipv6_re = Sympa::Regexps::ipv6();
if ($request_host =~ /\A$ipv6_re\z/) { # IPv6 address
$request_host = sprintf '[%s]', $request_host;
}
# Since CGI of some HTTP servers might split script-path and extra-path of
# script-URI inproperly, we'd be better to reconstruct them from these
# standard CGI environment variables:
# - SCRIPT_NAME: a URI path which could identify the CGI script.
# - PATH_INFO: derived from the portion of the URI path hierarchy
# following the part that identifies the script itself.
# Note that they are not URL-encoded, unlike non-standard REQUEST_URI.
my $org_script_name = $ENV{SCRIPT_NAME} // '';
my $org_path_info = $ENV{PATH_INFO} // '';
return unless '' eq $org_script_name or 0 == index $org_script_name, '/';
return unless '' eq $org_path_info or 0 == index $org_path_info, '/';
my $request_path = $org_script_name . $org_path_info;
# Find mail domain (a.k.a. "robot") of which web URL matches script-URI.
my ($robot_id, $script_path) = (undef, undef);
foreach my $rid (Sympa::List::get_robots()) {
my $local_url;
foreach my $key (@keys) {
......@@ -255,36 +265,29 @@ sub get_robot {
and $uri->scheme
and grep { $uri->scheme eq $_ } qw(http https);
my $host = lc($uri->host || '');
my $path = $uri->path || '/';
#FIXME:might need percent-decode hosts and/or paths
my $host = lc URI::Escape::uri_unescape($uri->host // '');
my $path = URI::Escape::uri_unescape($uri->path // '');
next unless $request_host eq $host;
next
unless $request_host eq $host
and 0 == index $request_path, $path;
unless $request_path eq $path
or 0 == index $request_path, $path . '/';
# The longest path wins.
($robot_id, $selected_path) = ($rid, $path)
if length $selected_path < length $path;
}
($robot_id, $script_path) = ($rid, $path)
if not defined $script_path
or length $script_path < length $path;
}
return (defined $robot_id) ? $robot_id : $Conf::Conf{'domain'};
return unless $robot_id;
return
wantarray
? ($robot_id, $script_path, substr $request_path, length $script_path)
: $robot_id;
}
# Old name: (part of) get_header_field() in wwsympa.fcgi.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field is
# _no longer_ referred and this function returns only locally detected server
# name.
sub _get_server_name {
my $server = $ENV{SERVER_NAME};
return undef unless defined $server and length $server;
my $ipv6_re = Sympa::Regexps::ipv6();
if ($server =~ /\A$ipv6_re\z/) { # IPv6 address
$server = "[$server]";
}
return lc $server;
}
# No longer used.
#sub _get_server_name;
# Old name: (part of) get_header_field() in wwsympa.fcgi.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Host:" request field is
......
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
use strict;
use warnings;
use English qw(-no_match_vars);
use File::Path qw(make_path rmtree);
use Test::More;
BEGIN {
use_ok 'Sympa::WWW::Tools';
}
# get_robot()
%Conf::Conf = (
domain => 'mail.example.org',
listmaster => 'listmaster@example.org',
wwsympa_url => 'http://web.example.org/sym/pa',
etc => 't/tmp/etc',
);
make_path $Conf::Conf{'etc'} or die $ERRNO;
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa';
$ENV{PATH_INFO} = undef;
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', ''],
'SCRIPT_NAME & empty PATH_INFO';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', '/help'],
'SCRIPT_NAME & non-empty PATH_INFO';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym';
$ENV{PATH_INFO} = '/pa/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', '/help'],
'split script-path (e.g. mod_proxy_fcgi on httpd)';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa/help';
$ENV{PATH_INFO} = undef;
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', '/help'],
'no PATH_INFO (e.g. nginx without fastcgi_split_path_info)';
$ENV{SERVER_NAME} = 'other.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'mismatch SERVER_NAME';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sympa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'mismatch SCRIPT_NAME';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = 'sym/pa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'dubious SCRIPT_NAME';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa/';
$ENV{PATH_INFO} = 'help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'dubious PATH_INFO';
$Conf::Conf{wwsympa_url} = 'http://web.example.org';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '', '/help'],
'URL prefix on the top: (empty) SCRIPT_NAME & non-empty PATH_INFO';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/help';
$ENV{PATH_INFO} = undef;
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '', '/help'],
'URL prefix on the top: no PATH_INFO';
done_testing();
rmtree 't/tmp';
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment