Revision: 1179
http://www.exim.org/viewvc/pcre2?view=rev&revision=1179
Author: ph10
Date: 2019-10-16 18:12:13 +0100 (Wed, 16 Oct 2019)
Log Message:
-----------
Fix error offset bug introduced at 1176.
Modified Paths:
--------------
code/trunk/src/pcre2_match.c
code/trunk/testdata/testinput10
code/trunk/testdata/testoutput10
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2019-10-16 12:50:55 UTC (rev 1178)
+++ code/trunk/src/pcre2_match.c 2019-10-16 17:12:13 UTC (rev 1179)
@@ -6184,7 +6184,11 @@
}
match_data->subject = NULL;
+/* Zero the error offset in case the first code unit is invalid UTF. */
+match_data->startchar = 0;
+
+
/* ============================= JIT matching ============================== */
/* Prepare for JIT matching. Check a UTF string for validity unless no check is
Modified: code/trunk/testdata/testinput10
===================================================================
--- code/trunk/testdata/testinput10 2019-10-16 12:50:55 UTC (rev 1178)
+++ code/trunk/testdata/testinput10 2019-10-16 17:12:13 UTC (rev 1179)
@@ -579,4 +579,10 @@
/(?:\x{ff}|\x{3000})/I,utf
+/x/utf
+ abxyz
+ \x80\=startchar
+ abc\x80\=startchar
+ abc\x80\=startchar,offset=3
+
# End of testinput10
Modified: code/trunk/testdata/testoutput10
===================================================================
--- code/trunk/testdata/testoutput10 2019-10-16 12:50:55 UTC (rev 1178)
+++ code/trunk/testdata/testoutput10 2019-10-16 17:12:13 UTC (rev 1179)
@@ -1803,4 +1803,14 @@
Starting code units: \xc3 \xe3
Subject length lower bound = 1
+/x/utf
+ abxyz
+ 0: x
+ \x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 0
+ abc\x80\=startchar
+Failed: error -22: UTF-8 error: isolated byte with 0x80 bit set at offset 3
+ abc\x80\=startchar,offset=3
+Error -36 (bad UTF-8 offset)
+
# End of testinput10