Unverified Commit d2a043dd authored by IKEDA Soji's avatar IKEDA Soji Committed by GitHub
Browse files

Merge pull request #910 from ikedas/issue-879_trial-2 by ikedas

Split request URI into SCRIPT_NAME and PATH_INFO by Sympa itself
parents 7dee23b6 c23bd70a
...@@ -42,6 +42,7 @@ check_SCRIPTS = \ ...@@ -42,6 +42,7 @@ check_SCRIPTS = \
t/Tools_SMIME.t \ t/Tools_SMIME.t \
t/Tools_Text.t \ t/Tools_Text.t \
t/Tools_Time.t \ t/Tools_Time.t \
t/WWW_Tools.t \
t/compile_executables.t \ t/compile_executables.t \
t/compile_modules.t \ t/compile_modules.t \
t/compile_scenarios.t \ t/compile_scenarios.t \
......
...@@ -38,7 +38,6 @@ use strict; ...@@ -38,7 +38,6 @@ use strict;
use lib split(/:/, $ENV{SYMPALIB} || ''), '--modulesdir--'; use lib split(/:/, $ENV{SYMPALIB} || ''), '--modulesdir--';
   
use Archive::Zip qw(); use Archive::Zip qw();
use CGI::Fast qw();
use DateTime; use DateTime;
use DateTime::Format::Mail; use DateTime::Format::Mail;
use Digest::MD5; use Digest::MD5;
...@@ -92,6 +91,7 @@ use Sympa::Tools::Text; ...@@ -92,6 +91,7 @@ use Sympa::Tools::Text;
use Sympa::Tracking; use Sympa::Tracking;
use Sympa::User; use Sympa::User;
use Sympa::WWW::Auth; use Sympa::WWW::Auth;
use Sympa::WWW::FastCGI;
use Sympa::WWW::Marc::Search; use Sympa::WWW::Marc::Search;
use Sympa::WWW::Report; use Sympa::WWW::Report;
use Sympa::WWW::Session; use Sympa::WWW::Session;
...@@ -1049,7 +1049,7 @@ $log->syslog('info', 'WWSympa started, process %d', $PID); ...@@ -1049,7 +1049,7 @@ $log->syslog('info', 'WWSympa started, process %d', $PID);
# Main loop. # Main loop.
my $loop_count = 0; my $loop_count = 0;
my $start_time = time; my $start_time = time;
while ($query = CGI::Fast->new) { while ($query = Sympa::WWW::FastCGI->new) {
$loop_count++; $loop_count++;
   
undef $param; undef $param;
...@@ -1106,17 +1106,23 @@ while ($query = CGI::Fast->new) { ...@@ -1106,17 +1106,23 @@ while ($query = CGI::Fast->new) {
## Though I don't know why, __DIE__ handler is cleared after INIT. ## Though I don't know why, __DIE__ handler is cleared after INIT.
Sympa::Crash::register_handler(); Sympa::Crash::register_handler();
   
foreach my $envvar (
qw(ORIG_PATH_INFO ORIG_SCRIPT_NAME
PATH_INFO QUERY_STRING REMOTE_ADDR REMOTE_HOST REQUEST_METHOD
SCRIPT_NAME SERVER_NAME SERVER_PORT
SYMPA_DOMAIN)
) {
$log->syslog('debug', '%s=%s', $envvar, $ENV{$envvar});
}
## Get params in a hash ## Get params in a hash
%in = $query->Vars; %in = $query->Vars;
   
# Determin robot. # Determin robot.
# N.B. As of 6.2.15, the http_host parameter will match with the host name $robot = $ENV{SYMPA_DOMAIN};
# and path locally detected by server. If remotely detected host name unless ($robot) {
# and / or path should be differ, the proxy must adjust them. # No robot providing web service found.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional. print "Status: 421 Misdirected Request\n";
$robot = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
unless (Conf::get_robot_conf($robot, 'wwsympa_url')) {
print "Status: 404 Not Found\n";
print "\n\n"; print "\n\n";
next; next;
} }
...@@ -1940,8 +1946,6 @@ sub _crash_handler { ...@@ -1940,8 +1946,6 @@ sub _crash_handler {
sub _split_params { sub _split_params {
my $args_string = shift; my $args_string = shift;
   
$log->syslog('debug', "PATH_INFO: %s", $ENV{'PATH_INFO'});
$args_string =~ s+^/++; $args_string =~ s+^/++;
   
my $ending_slash = 0; my $ending_slash = 0;
...@@ -3857,7 +3861,7 @@ sub do_help { ...@@ -3857,7 +3861,7 @@ sub do_help {
# Strip extensions. # Strip extensions.
$in{'help_topic'} =~ s/[.].*// if $in{'help_topic'}; $in{'help_topic'} =~ s/[.].*// if $in{'help_topic'};
# Given partial top URI, redirect to base. # Given partial top URI, redirect to base.
unless ($in{'help_topic'} or $ENV{REQUEST_URI} =~ /\/\z/) { unless ($in{'help_topic'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) {
$param->{'redirect_to'} = Sympa::get_url( $param->{'redirect_to'} = Sympa::get_url(
$robot, 'help', $robot, 'help',
nomenu => $param->{'nomenu'}, nomenu => $param->{'nomenu'},
...@@ -8645,7 +8649,7 @@ sub do_arc { ...@@ -8645,7 +8649,7 @@ sub do_arc {
); );
return 1; return 1;
} }
unless ($in{'arc_file'} or $ENV{REQUEST_URI} =~ /\/\z/) { unless ($in{'arc_file'} or ($ENV{PATH_INFO} // '') =~ m{/\z}) {
$param->{'redirect_to'} = Sympa::get_url( $param->{'redirect_to'} = Sympa::get_url(
$list, 'arc', $list, 'arc',
nomenu => $param->{'nomenu'}, nomenu => $param->{'nomenu'},
......
...@@ -189,6 +189,7 @@ nobase_modules_DATA = \ ...@@ -189,6 +189,7 @@ nobase_modules_DATA = \
Sympa/Upgrade.pm \ Sympa/Upgrade.pm \
Sympa/User.pm \ Sympa/User.pm \
Sympa/WWW/Auth.pm \ Sympa/WWW/Auth.pm \
Sympa/WWW/FastCGI.pm \
Sympa/WWW/Marc.pm \ Sympa/WWW/Marc.pm \
Sympa/WWW/Marc/Search.pm \ Sympa/WWW/Marc/Search.pm \
Sympa/WWW/Report.pm \ Sympa/WWW/Report.pm \
......
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
# Sympa - SYsteme de Multi-Postage Automatique
#
# Copyright 2020 The Sympa Community. See the AUTHORS.md
# file at the top-level directory of this distribution and at
# <https://github.com/sympa-community/sympa.git>.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
package Sympa::WWW::FastCGI;
use strict;
use warnings;
use base qw(CGI::Fast);
use Sympa::WWW::Tools;
sub new {
my $class = shift;
my @args = @_;
my $self = $class->SUPER::new(@args);
# Determin mail domain (a.k.a. "robot") the request is dispatched.
# N.B. As of 6.2.15, the http_host parameter will match with the host name
# and path locally detected by server. If remotely detected host name
# and / or path should be differ, the proxy must adjust them.
# N.B. As of 6.2.34, wwsympa_url parameter may be optional.
my @vars = Sympa::WWW::Tools::get_robot('http_host', 'wwsympa_url');
if (@vars) {
@ENV{qw(ORIG_SCRIPT_NAME ORIG_PATH_INFO)} =
@ENV{qw(SCRIPT_NAME PATH_INFO)};
@ENV{qw(SYMPA_DOMAIN SCRIPT_NAME PATH_INFO)} = @vars;
} else {
delete $ENV{SYMPA_DOMAIN};
}
$self;
}
1;
__END__
=encoding utf-8
=head1 NAME
Sympa::WWW::FastCGI - CGI Interface for FastCGI of Sympa
=head1 SYNOPOSIS
TBD.
=head1 DESCRIPTION
TBD.
=head1 SEE ALSO
L<CGI::Fast>.
RFC 3875, The Common Gateway Interface (CGI) Version 1.1.
L<https://tools.ietf.org/html/rfc3875>.
=head1 HISTORY
L<Sympa::WWW::FastCGI> appeared on Sympa 6.2.55b.
=cut
...@@ -33,6 +33,7 @@ use Digest::MD5; ...@@ -33,6 +33,7 @@ use Digest::MD5;
use English qw(-no_match_vars); use English qw(-no_match_vars);
use File::Path qw(); use File::Path qw();
use URI; use URI;
use URI::Escape qw();
use Sympa; use Sympa;
use Conf; use Conf;
...@@ -201,91 +202,93 @@ sub get_my_url { ...@@ -201,91 +202,93 @@ sub get_my_url {
my $robot = shift; my $robot = shift;
my %options = @_; my %options = @_;
my $original_path_info; my $path_info = $ENV{PATH_INFO} // '';
my $query_string = $ENV{QUERY_STRING} // '';
# Try getting encoded PATH_INFO and query. return
my $request_uri = $ENV{REQUEST_URI} || ''; Sympa::get_url($robot, undef, authority => $options{authority})
my $script_name = $ENV{SCRIPT_NAME} || ''; . Sympa::Tools::Text::encode_uri($path_info, omit => '/')
if ( $request_uri eq $script_name . (length $query_string ? '?' : '')
or 0 == index($request_uri, $script_name . '?') . $query_string;
or 0 == index($request_uri, $script_name . '/')) {
$original_path_info = substr($request_uri, length $script_name);
} else {
# Workaround: Encode PATH_INFO again and use it.
my $path_info = $ENV{PATH_INFO} || '';
my $query_string = $ENV{QUERY_STRING};
$original_path_info =
Sympa::Tools::Text::encode_uri($path_info, omit => '/')
. ($query_string ? ('?' . $query_string) : '');
}
return Sympa::get_url($robot, undef, authority => $options{authority})
. $original_path_info;
} }
# Determine robot. # Determine robot.
sub get_robot { sub get_robot {
my @keys = @_; my @keys = @_;
my $request_host = _get_server_name(); # Get host part of script-URI from standard CGI environment variable
my $request_path = $ENV{'REQUEST_URI'} || ''; # SERVER_NAME.
my $robot_id; # NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field
# is _no longer_ referred and this function returns only locally detected
if (defined $request_host and length $request_host) { # server name.
my $selected_path = ''; my $request_host = lc($ENV{SERVER_NAME} // '');
foreach my $rid (Sympa::List::get_robots()) { return unless length $request_host;
my $local_url; my $ipv6_re = Sympa::Regexps::ipv6();
foreach my $key (@keys) { if ($request_host =~ /\A$ipv6_re\z/) { # IPv6 address
$local_url = Conf::get_robot_conf($rid, $key); $request_host = sprintf '[%s]', $request_host;
last if $local_url; }
}
next unless $local_url; # Since CGI of some HTTP servers might split script-path and extra-path of
# script-URI inproperly, we'd be better to reconstruct them from these
if ($local_url =~ m{\A[-+\w]+:}) { # standard CGI environment variables:
; # - SCRIPT_NAME: a URI path which could identify the CGI script.
} elsif ($local_url =~ m{\A//}) { # - PATH_INFO: derived from the portion of the URI path hierarchy
$local_url = 'http:' . $local_url; # following the part that identifies the script itself.
} else { # Note that they are not URL-encoded, unlike non-standard REQUEST_URI.
$local_url = 'http://' . $local_url; my $org_script_name = $ENV{SCRIPT_NAME} // '';
} my $org_path_info = $ENV{PATH_INFO} // '';
return unless '' eq $org_script_name or 0 == index $org_script_name, '/';
return unless '' eq $org_path_info or 0 == index $org_path_info, '/';
my $request_path = $org_script_name . $org_path_info;
# Find mail domain (a.k.a. "robot") of which web URL matches script-URI.
my ($robot_id, $script_path) = (undef, undef);
foreach my $rid (Sympa::List::get_robots()) {
my $local_url;
foreach my $key (@keys) {
$local_url = Conf::get_robot_conf($rid, $key);
last if $local_url;
}
next unless $local_url;
my $uri = URI->new($local_url); if ($local_url =~ m{\A[-+\w]+:}) {
next ;
unless $uri } elsif ($local_url =~ m{\A//}) {
and $uri->scheme $local_url = 'http:' . $local_url;
and grep { $uri->scheme eq $_ } qw(http https); } else {
$local_url = 'http://' . $local_url;
my $host = lc($uri->host || '');
my $path = $uri->path || '/';
#FIXME:might need percent-decode hosts and/or paths
next
unless $request_host eq $host
and 0 == index $request_path, $path;
# The longest path wins.
($robot_id, $selected_path) = ($rid, $path)
if length $selected_path < length $path;
} }
}
return (defined $robot_id) ? $robot_id : $Conf::Conf{'domain'}; my $uri = URI->new($local_url);
} next
unless $uri
and $uri->scheme
and grep { $uri->scheme eq $_ } qw(http https);
# Old name: (part of) get_header_field() in wwsympa.fcgi. my $host = lc URI::Escape::uri_unescape($uri->host // '');
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Server:" request field is my $path = URI::Escape::uri_unescape($uri->path // '');
# _no longer_ referred and this function returns only locally detected server next unless $request_host eq $host;
# name. next
sub _get_server_name { unless $request_path eq $path
my $server = $ENV{SERVER_NAME}; or 0 == index $request_path, $path . '/';
return undef unless defined $server and length $server;
my $ipv6_re = Sympa::Regexps::ipv6(); # The longest path wins.
if ($server =~ /\A$ipv6_re\z/) { # IPv6 address ($robot_id, $script_path) = ($rid, $path)
$server = "[$server]"; if not defined $script_path
or length $script_path < length $path;
} }
return lc $server;
return unless $robot_id;
return
wantarray
? ($robot_id, $script_path, substr $request_path, length $script_path)
: $robot_id;
} }
# Old name: (part of) get_header_field() in wwsympa.fcgi.
# No longer used.
#sub _get_server_name;
# Old name: (part of) get_header_field() in wwsympa.fcgi. # Old name: (part of) get_header_field() in wwsympa.fcgi.
# NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Host:" request field is # NOTE: As of 6.2.15, less trustworthy "X-Forwarded-Host:" request field is
# _no longer_ referred and this function returns only locally detected host # _no longer_ referred and this function returns only locally detected host
......
# -*- indent-tabs-mode: nil; -*-
# vim:ft=perl:et:sw=4
use strict;
use warnings;
use English qw(-no_match_vars);
use File::Path qw(make_path rmtree);
use Test::More;
BEGIN {
use_ok 'Sympa::WWW::Tools';
}
# get_robot()
%Conf::Conf = (
domain => 'mail.example.org',
listmaster => 'listmaster@example.org',
wwsympa_url => 'http://web.example.org/sym/pa',
etc => 't/tmp/etc',
);
make_path $Conf::Conf{'etc'} or die $ERRNO;
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa';
$ENV{PATH_INFO} = undef;
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', ''],
'SCRIPT_NAME & empty PATH_INFO';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', '/help'],
'SCRIPT_NAME & non-empty PATH_INFO';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym';
$ENV{PATH_INFO} = '/pa/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', '/help'],
'split script-path (e.g. mod_proxy_fcgi on httpd)';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa/help';
$ENV{PATH_INFO} = undef;
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '/sym/pa', '/help'],
'no PATH_INFO (e.g. nginx without fastcgi_split_path_info)';
$ENV{SERVER_NAME} = 'other.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'mismatch SERVER_NAME';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sympa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'mismatch SCRIPT_NAME';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = 'sym/pa';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'dubious SCRIPT_NAME';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/sym/pa/';
$ENV{PATH_INFO} = 'help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')], [],
'dubious PATH_INFO';
$Conf::Conf{wwsympa_url} = 'http://web.example.org';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '';
$ENV{PATH_INFO} = '/help';
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '', '/help'],
'URL prefix on the top: (empty) SCRIPT_NAME & non-empty PATH_INFO';
$ENV{SERVER_NAME} = 'web.example.org';
$ENV{SCRIPT_NAME} = '/help';
$ENV{PATH_INFO} = undef;
is_deeply [Sympa::WWW::Tools::get_robot('wwsympa_url')],
['mail.example.org', '', '/help'],
'URL prefix on the top: no PATH_INFO';
done_testing();
rmtree 't/tmp';
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment