Re: Runaway sed memory use in test on older sed+glibc (was "Re: [PATCH v6 1/3] test: add helper functions for git-bundle")

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> 于2021年5月27日周四
上午2:51写道:
>
>
> On Mon, Jan 11 2021, Jiang Xin wrote:
>
> > From: Jiang Xin <zhiyou.jx@xxxxxxxxxxxxxxx>
> >
> > Move git-bundle related functions from t5510 to a library, and this
> > lib
> > will be shared with a new testcase t6020 which finds a known
> > breakage of
> > "git-bundle".
> > [...]
> > +
> > +# Format the output of git commands to make a user-friendly and
> > stable
> > +# text.  We can easily prepare the expect text without having to
> > worry
> > +# about future changes of the commit ID and spaces of the output.
> > +make_user_friendly_and_stable_output () {
> > +     sed \
> > +             -e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
> > +             -e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
> > +             -e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
> > +             -e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
> > +             -e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
> > +             -e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
> > +             -e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
> > +             -e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
> > +             -e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
> > +             -e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
> > +             -e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
> > +             -e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
> > +             -e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
> > +             -e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
> > +             -e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
> > +             -e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
> > +             -e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
> > +             -e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
> > +             -e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
> > +             -e "s/ *\$//"
> > +}
>
> On one of the gcc farm boxes, a i386 box (gcc45) this fails because
> sed
> gets killed after >500MB of memory use (I was just eyeballing it in
> htop) on the "reate bundle from special rev: main^!" test. This with
> GNU
> sed 4.2.2.
>
> I suspect this regex pattern creates some runaway behavior in sed
> that's
> since been fixed (or maybe it's the glibc regex engine?). The glibc is
> 2.19-18+deb8u10:
>
>     + git bundle list-heads special-rev.bdl
>     + make_user_friendly_and_stable_output
>     + sed -e s/[0-9a-f]*/<COMMIT-A>/g -e s/[0-9a-f]*/<COMMIT-B>/g -e
> s/[0-9a-f]*/<COMMIT-C>/g -e s/[0-9a-f]*/<COMMIT-D>/g -e
> s/[0-9a-f]*/<COMMIT-E>/g -e s/[0-9a-f]*/<COMMIT-F>/g -e
> s/[0-9a-f]*/<COMMIT-G>/g -e s/[0-9a-f]*/<COMMIT-H>/g -e
> s/[0-9a-f]*/<COMMIT-I>/g -e s/[0-9a-f]*/<COMMIT-J>/g -e
> s/[0-9a-f]*/<COMMIT-K>/g -e s/[0-9a-f]*/<COMMIT-L>/g -e
> s/[0-9a-f]*/<COMMIT-M>/g -e s/[0-9a-f]*/<COMMIT-N>/g -e
> s/[0-9a-f]*/<COMMIT-O>/g -e s/[0-9a-f]*/<COMMIT-P>/g -e
> s/[0-9a-f]*/<TAG-1>/g -e s/[0-9a-f]*/<TAG-2>/g -e
> s/[0-9a-f]*/<TAG-3>/g -e s/ *$//
>     sed: couldn't re-allocate memory

I wrote a program on macOS to check memory footprint for sed and perl.
See:

    https://github.com/jiangxin/compare-sed-perl

Test result:

    $ go build && ./compare-sed-perl
    Command: sed  ..., MaxRSS: 901120
    Command: gsed ..., MaxRSS: 2056192
    Command: perl ..., MaxRSS: 2269184

It seems that sed (both the builtin version on macOS and GNU sed v4.8)
has less memory consumed than perl.

Can you run this program on the i386 box (gcc45) to check memory consumed
by sed and perl?

If this issue can be resolved by replacing sed with perl, the following
patch may help:

--- >8 ---
From: Jiang Xin <zhiyou.jx@xxxxxxxxxxxxxxx>
Date: Thu, 27 May 2021 14:31:49 +0800
Subject: [PATCH] test: use perl for complex text replacement

Ævar reported that the function `make_user_friendly_and_stable_output()`
failed on a i386 box (gcc45) in the gcc farm boxes with error:

    sed: couldn't re-allocate memory

It seems that sed (GNU sed 4.2.2) gets killed after >500MB of memory
use on the "create bundle from special rev: main^!" test.

Call perl instead of sed for complex text replacement.

Reported-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
Signed-off-by: Jiang Xin <zhiyou.jx@xxxxxxxxxxxxxxx>
---
 t/t5411/common-functions.sh | 27 ++++++++++++------------
 t/t5548-push-porcelain.sh   | 20 +++++++++---------
 t/t6020-bundle-misc.sh      | 42 ++++++++++++++++++-------------------
 3 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/t/t5411/common-functions.sh b/t/t5411/common-functions.sh
index 6694858e18..b6d33bdfdc 100644
--- a/t/t5411/common-functions.sh
+++ b/t/t5411/common-functions.sh
@@ -39,19 +39,20 @@ create_commits_in () {
 # remove some locale error messages. The emitted human-readable errors are
 # redundant to the more machine-readable output the tests already assert.
 make_user_friendly_and_stable_output () {
-	sed \
-		-e "s/  *\$//" \
-		-e "s/  */ /g" \
-		-e "s/'/\"/g" \
-		-e "s/	/    /g" \
-		-e "s/$A/<COMMIT-A>/g" \
-		-e "s/$B/<COMMIT-B>/g" \
-		-e "s/$TAG/<TAG-v123>/g" \
-		-e "s/$ZERO_OID/<ZERO-OID>/g" \
-		-e "s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g" \
-		-e "s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g" \
-		-e "s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#" \
-		-e "/^error: / d"
+	perl -ne "
+		s/  *\$//;
+		s/  */ /g;
+		s/'/\"/g;
+		s/	/    /g;
+		s/$A/<COMMIT-A>/g;
+		s/$B/<COMMIT-B>/g;
+		s/$TAG/<TAG-v123>/g;
+		s/$ZERO_OID/<ZERO-OID>/g;
+		s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g;
+		s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g;
+		s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#;
+		next if /^error: .*$/;
+		print"
 }
 
 filter_out_user_friendly_and_stable_output () {
diff --git a/t/t5548-push-porcelain.sh b/t/t5548-push-porcelain.sh
index 5a761f3642..95e216973d 100755
--- a/t/t5548-push-porcelain.sh
+++ b/t/t5548-push-porcelain.sh
@@ -44,16 +44,16 @@ create_commits_in () {
 # without having to worry about future changes of the commit ID and spaces
 # of the output.
 make_user_friendly_and_stable_output () {
-	sed \
-		-e "s/  *\$//" \
-		-e "s/   */ /g" \
-		-e "s/	/    /g" \
-		-e "s/$A/<COMMIT-A>/g" \
-		-e "s/$B/<COMMIT-B>/g" \
-		-e "s/$ZERO_OID/<ZERO-OID>/g" \
-		-e "s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g" \
-		-e "s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g" \
-		-e "s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#"
+	perl -pe "
+		s/  *\$//;
+		s/   */ /g;
+		s/	/    /g;
+		s/$A/<COMMIT-A>/g;
+		s/$B/<COMMIT-B>/g;
+		s/$ZERO_OID/<ZERO-OID>/g;
+		s/$(echo $A | cut -c1-7)[0-9a-f]*/<OID-A>/g;
+		s/$(echo $B | cut -c1-7)[0-9a-f]*/<OID-B>/g;
+		s#To $URL_PREFIX/upstream.git#To <URL/of/upstream.git>#"
 }
 
 setup_upstream_and_workbench () {
diff --git a/t/t6020-bundle-misc.sh b/t/t6020-bundle-misc.sh
index 881f72fd44..f284be820f 100755
--- a/t/t6020-bundle-misc.sh
+++ b/t/t6020-bundle-misc.sh
@@ -84,27 +84,27 @@ test_commit_setvar () {
 # text.  We can easily prepare the expect text without having to worry
 # about future changes of the commit ID and spaces of the output.
 make_user_friendly_and_stable_output () {
-	sed \
-		-e "s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g" \
-		-e "s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g" \
-		-e "s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g" \
-		-e "s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g" \
-		-e "s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g" \
-		-e "s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g" \
-		-e "s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g" \
-		-e "s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g" \
-		-e "s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g" \
-		-e "s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g" \
-		-e "s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g" \
-		-e "s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g" \
-		-e "s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g" \
-		-e "s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g" \
-		-e "s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g" \
-		-e "s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g" \
-		-e "s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g" \
-		-e "s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g" \
-		-e "s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g" \
-		-e "s/ *\$//"
+	perl -pe "
+		s/${A%${A#???????}}[0-9a-f]*/<COMMIT-A>/g;
+		s/${B%${B#???????}}[0-9a-f]*/<COMMIT-B>/g;
+		s/${C%${C#???????}}[0-9a-f]*/<COMMIT-C>/g;
+		s/${D%${D#???????}}[0-9a-f]*/<COMMIT-D>/g;
+		s/${E%${E#???????}}[0-9a-f]*/<COMMIT-E>/g;
+		s/${F%${F#???????}}[0-9a-f]*/<COMMIT-F>/g;
+		s/${G%${G#???????}}[0-9a-f]*/<COMMIT-G>/g;
+		s/${H%${H#???????}}[0-9a-f]*/<COMMIT-H>/g;
+		s/${I%${I#???????}}[0-9a-f]*/<COMMIT-I>/g;
+		s/${J%${J#???????}}[0-9a-f]*/<COMMIT-J>/g;
+		s/${K%${K#???????}}[0-9a-f]*/<COMMIT-K>/g;
+		s/${L%${L#???????}}[0-9a-f]*/<COMMIT-L>/g;
+		s/${M%${M#???????}}[0-9a-f]*/<COMMIT-M>/g;
+		s/${N%${N#???????}}[0-9a-f]*/<COMMIT-N>/g;
+		s/${O%${O#???????}}[0-9a-f]*/<COMMIT-O>/g;
+		s/${P%${P#???????}}[0-9a-f]*/<COMMIT-P>/g;
+		s/${TAG1%${TAG1#???????}}[0-9a-f]*/<TAG-1>/g;
+		s/${TAG2%${TAG2#???????}}[0-9a-f]*/<TAG-2>/g;
+		s/${TAG3%${TAG3#???????}}[0-9a-f]*/<TAG-3>/g;
+		s/ *\$//"
 }
 
 #            (C)   (D, pull/1/head, topic/1)
-- 
2.32.0.rc0




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux