Unverified Commit a58ee68d authored by IKEDA Soji's avatar IKEDA Soji Committed by GitHub
Browse files

Merge pull request #1106 from ikedas/safe_substr by ikedas

Addition to #1053: Clip subject safely
parents b8134cb4 a56aa062
......@@ -8,8 +8,8 @@
# Copyright (c) 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
# 2006, 2007, 2008, 2009, 2010, 2011 Comite Reseau des Universites
# Copyright (c) 2011, 2012, 2013, 2014, 2015, 2016, 2017 GIP RENATER
# Copyright 2017, 2018, 2019, 2020 The Sympa Community. See the AUTHORS.md
# file at the top-level directory of this distribution and at
# Copyright 2017, 2018, 2019, 2020, 2021 The Sympa Community. See the
# AUTHORS.md file at the top-level directory of this distribution and at
# <https://github.com/sympa-community/sympa.git>.
#
# This program is free software; you can redistribute it and/or modify
......@@ -6037,7 +6037,8 @@ sub _update_list_db {
my $name = $self->{'name'};
my $searchkey =
substr( Sympa::Tools::Text::foldcase($self->{'admin'}{'subject'} || ''), 0, 255);
Sympa::Tools::Text::clip(
Sympa::Tools::Text::foldcase($self->{'admin'}{'subject'} // ''), 255);
my $status = $self->{'admin'}{'status'};
my $robot = $self->{'domain'};
......
......@@ -484,6 +484,60 @@ sub qencode_filename {
return $filename;
}
sub clip {
my $string = shift;
return undef unless @_;
my $length = shift;
my ($gcstr, $blen);
if (ref $string eq 'Unicode::GCString') {
$gcstr = $string;
$blen = length Encode::encode_utf8($string->as_string);
} elsif (Encode::is_utf8($string)) {
$gcstr = Unicode::GCString->new($string);
$blen = length Encode::encode_utf8($string);
} else {
$gcstr = Unicode::GCString->new(Encode::decode_utf8($string));
$blen = length $string;
}
$length += $blen if $length < 0;
return '' if $length < 0; # out of range
return $string if $blen <= $length;
my $result = $gcstr->substr(0, _gc_length($gcstr, $length));
if (ref $string eq 'Unicode::GCString') {
return $result;
} elsif (Encode::is_utf8($string)) {
return $result->as_string;
} else {
return Encode::encode_utf8($result->as_string);
}
}
sub _gc_length {
my $gcstr = shift;
my $length = shift;
return 0 unless $gcstr->length;
return 0 unless $length;
my ($shorter, $longer) = (0, $gcstr->length);
while ($shorter < $longer) {
my $cur = ($shorter + $longer + 1) >> 1;
my $elen =
length Encode::encode_utf8($gcstr->substr(0, $cur)->as_string);
if ($elen <= $length) {
$shorter = $cur;
} else {
$longer = $cur - 1;
}
}
return $shorter;
}
# Old name: tools::unescape_chars().
sub unescape_chars {
my $s = shift;
......@@ -782,6 +836,12 @@ Parameters:
E-mail address.
=item clip ( $string, $length )
Clips $string according to $length by bytes,
considering boundary of grapheme clusters.
UTF-8 is assumed for $string as bytestring.
=item decode_html =E<gt> 1
If set, arguments are assumed to include HTML entities.
......@@ -937,4 +997,6 @@ guessed_to_utf8() and pad() were added on Sympa 6.2.17.
canonic_text() and slurp() were added on Sympa 6.2.53b.
clip() was added on Sympa 6.2.61b.
=cut
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment