Revision: 326
http://www.exim.org/viewvc/pcre2?view=rev&revision=326
Author: ph10
Date: 2015-07-24 14:30:50 +0100 (Fri, 24 Jul 2015)
Log Message:
-----------
Fix pedantic infelicities shown up by clang and a UTF-8 checking overflow bug.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/RunTest
code/trunk/src/pcre2_compile.c
code/trunk/src/pcre2_dfa_match.c
code/trunk/src/pcre2_match.c
code/trunk/src/pcre2_string_utils.c
code/trunk/src/pcre2_valid_utf.c
code/trunk/src/pcre2test.c
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/ChangeLog 2015-07-24 13:30:50 UTC (rev 326)
@@ -77,7 +77,11 @@
argument might be incorrectly processed, especially if the string contained \Q.
This bug was discovered by Karl Skomski with the LLVM fuzzer.
+21. Compiling PCRE2 with the sanitize options of clang showed up a number of
+very pedantic coding infelicities and a buffer overflow while checking a UTF-8
+string if the final multi-byte UTF-8 character was truncated.
+
Version 10.20 30-June-2015
--------------------------
Modified: code/trunk/RunTest
===================================================================
--- code/trunk/RunTest 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/RunTest 2015-07-24 13:30:50 UTC (rev 326)
@@ -33,6 +33,10 @@
# For backwards compatibility, -nojit, -valgrind, -valgrind-log, and -sim may
# be given without the leading "-" character.
#
+# When PCRE2 is compiled by clang with -fsanitize arguments, some tests need
+# very much more stack than normal. In environments where the stack can be
+# set at runtime, -bigstack sets a gigantic stack.
+#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
@@ -184,6 +188,7 @@
arg16=
arg32=
nojit=
+bigstack=
sim=
skip=
valgrind=
@@ -240,6 +245,7 @@
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
+ bigstack|-bigstack) bigstack=yes;;
nojit|-nojit) nojit=yes;;
sim|-sim) shift; sim=$1;;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all";;
@@ -287,13 +293,22 @@
# If it is possible to set the system stack size, arrange to set a value for
# test 2, which needs more than the even the Linux default when PCRE2 has been
-# compiled with -fsanitize=address.
+# compiled by gcc with -fsanitize=address. When the compiler is clang, sanitize
+# options require an even bigger stack for test 2, and an increased stack for
+# some of the other tests.
$sim ./pcre2test -S 1 /dev/null /dev/null
if [ $? -eq 0 ] ; then
- test2stack="-S 16"
+ if [ "$bigstack" = "" ] ; then
+ test2stack="-S 16"
+ defaultstack=""
+ else
+ test2stack="-S 1024"
+ defaultstack="-S 64"
+ fi
else
test2stack=""
+ defaultstack=""
fi
# All of 8-bit, 16-bit, and 32-bit character strings may be supported, but only
@@ -438,7 +453,7 @@
if [ $do1 = yes ] ; then
echo $title1
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput1 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput1 testtry
checkresult $? 1 "$opt"
done
fi
@@ -508,7 +523,7 @@
if [ "$locale" != "" ] ; then
echo $title3 "(using '$locale' locale)"
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $infile testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $infile testtry
if [ $? = 0 ] ; then
case "$opt" in
-jit) with=" with JIT";;
@@ -545,7 +560,7 @@
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput4 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput4 testtry
checkresult $? 4 "$opt"
done
fi
@@ -557,7 +572,7 @@
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput5 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput5 testtry
checkresult $? 5 "$opt"
done
fi
@@ -567,7 +582,7 @@
if [ $do6 = yes ] ; then
echo $title6
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput6 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput6 testtry
checkresult $? 6 ""
fi
@@ -576,7 +591,7 @@
if [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput7 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput7 testtry
checkresult $? 7 ""
fi
fi
@@ -596,7 +611,7 @@
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput8 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput8 testtry
checkresult $? 8-$bits ""
fi
fi
@@ -609,7 +624,7 @@
echo " Skipped when running 16/32-bit tests"
else
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput9 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput9 testtry
checkresult $? 9 "$opt"
done
fi
@@ -625,7 +640,7 @@
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput10 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput10 testtry
checkresult $? 10 "$opt"
done
fi
@@ -639,7 +654,7 @@
echo " Skipped when running 8-bit tests"
else
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput11 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput11 testtry
checkresult $? 11-$bits "$opt"
done
fi
@@ -656,7 +671,7 @@
echo " Skipped because UTF-$bits support is not available"
else
for opt in "" $jitopt; do
- $sim $valgrind ./pcre2test -q $bmode $opt $testdata/testinput12 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $opt $testdata/testinput12 testtry
checkresult $? 12-$bits "$opt"
done
fi
@@ -669,7 +684,7 @@
if [ "$bits" = "8" ] ; then
echo " Skipped when running 8-bit tests"
else
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput13 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput13 testtry
checkresult $? 13 ""
fi
fi
@@ -678,7 +693,7 @@
if [ $do14 = yes ] ; then
echo $title14
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput14 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput14 testtry
checkresult $? 14 ""
fi
@@ -689,7 +704,7 @@
if [ $jit -ne 0 ] ; then
echo " Skipped because JIT is available"
else
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput15 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput15 testtry
checkresult $? 15 ""
fi
fi
@@ -701,7 +716,7 @@
if [ $jit -eq 0 -o "$nojit" = "yes" ] ; then
echo " Skipped because JIT is not available or nojit was specified"
else
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput16 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput16 testtry
checkresult $? 16 ""
fi
fi
@@ -713,7 +728,7 @@
if [ "$bits" = "16" -o "$bits" = "32" ] ; then
echo " Skipped when running 16/32-bit tests"
else
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput17 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput17 testtry
checkresult $? 17 ""
fi
fi
@@ -727,7 +742,7 @@
elif [ $utf -eq 0 ] ; then
echo " Skipped because UTF-$bits support is not available"
else
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput18 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput18 testtry
checkresult $? 18 ""
fi
fi
@@ -736,7 +751,7 @@
if [ $do19 = yes ] ; then
echo $title19
- $sim $valgrind ./pcre2test -q $bmode $testdata/testinput19 testtry
+ $sim $valgrind ./pcre2test -q $defaultstack $bmode $testdata/testinput19 testtry
checkresult $? 19 ""
fi
Modified: code/trunk/src/pcre2_compile.c
===================================================================
--- code/trunk/src/pcre2_compile.c 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/src/pcre2_compile.c 2015-07-24 13:30:50 UTC (rev 326)
@@ -270,7 +270,7 @@
#ifndef EBCDIC
#define ESCAPES_FIRST CHAR_0
#define ESCAPES_LAST CHAR_z
-#define ESCAPES_UPPER_CASE (-32) /* Add this to upper case a letter */
+#define UPPER_CASE(c) (c-32)
static const short int escapes[] = {
0, 0,
@@ -323,11 +323,11 @@
#if 'a' == 0x81 /* Check for a real EBCDIC environment */
#define ESCAPES_FIRST CHAR_a
#define ESCAPES_LAST CHAR_9
-#define ESCAPES_UPPER_CASE (+64) /* Add this to upper case a letter */
+#define UPPER_CASE(c) (c+64)
#else /* Testing in an ASCII environment */
#define ESCAPES_FIRST ((unsigned char)'\x81') /* EBCDIC 'a' */
#define ESCAPES_LAST ((unsigned char)'\xf9') /* EBCDIC '9' */
-#define ESCAPES_UPPER_CASE (-32) /* Add this to upper case a letter */
+#define UPPER_CASE(c) (c-32)
#endif
static const short int escapes[] = {
@@ -1884,7 +1884,7 @@
s = cb->bracount - (s - 1);
}
- escape = -s;
+ escape = -(int)s;
break;
/* The handling of escape sequences consisting of a string of digits
@@ -1909,7 +1909,7 @@
{
oldptr = ptr;
/* The integer range is limited by the machine's int representation. */
- s = (int)(c - CHAR_0);
+ s = c - CHAR_0;
overflow = FALSE;
while (IS_DIGIT(ptr[1]))
{
@@ -1933,7 +1933,7 @@
if (s < 10 || *oldptr >= CHAR_8 || s <= cb->bracount)
{
- escape = -s; /* Indicates a back reference */
+ escape = -(int)s; /* Indicates a back reference */
break;
}
ptr = oldptr; /* Put the pointer back and fall through */
@@ -1981,7 +1981,7 @@
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x20000000l) { overflow = TRUE; break; }
#endif
- c = (c << 3) + cc - CHAR_0 ;
+ c = (c << 3) + (cc - CHAR_0);
#if PCRE2_CODE_UNIT_WIDTH == 8
if (c > (utf ? 0x10ffffU : 0xffU)) { overflow = TRUE; break; }
#elif PCRE2_CODE_UNIT_WIDTH == 16
@@ -2105,7 +2105,7 @@
#endif
c = *(++ptr);
- if (c >= CHAR_a && c <= CHAR_z) c += ESCAPES_UPPER_CASE;
+ if (c >= CHAR_a && c <= CHAR_z) c = UPPER_CASE(c);
if (c == CHAR_NULL && ptr >= cb->end_pattern)
{
*errorcodeptr = ERR2;
@@ -3532,7 +3532,7 @@
if (top_nest == (nest_save *)(cb->start_workspace)) top_nest = NULL;
else top_nest--;
}
- nest_depth--;
+ if (nest_depth > 0) nest_depth--; /* Can be 0 for unmatched ) */
break;
}
}
@@ -3938,7 +3938,8 @@
if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_STARTWORD, 6) == 0)
{
nestptr = ptr + 7;
- ptr = sub_start_of_word - 1;
+ ptr = sub_start_of_word; /* Do not combine these statements; clang's */
+ ptr--; /* sanitizer moans about a negative index. */
continue;
}
@@ -3945,7 +3946,8 @@
if (PRIV(strncmp_c8)(ptr+1, STRING_WEIRD_ENDWORD, 6) == 0)
{
nestptr = ptr + 7;
- ptr = sub_end_of_word - 1;
+ ptr = sub_end_of_word; /* Do not combine these statements; clang's */
+ ptr--; /* sanitizer moans about a negative index. */
continue;
}
@@ -5960,7 +5962,7 @@
goto FAILED;
}
if (refsign != 0) recno = (refsign == CHAR_MINUS)?
- cb->bracount - recno + 1 : recno + cb->bracount;
+ (cb->bracount + 1) - recno : recno + cb->bracount;
if (recno <= 0 || (uint32_t)recno > cb->final_bracount)
{
*errorcodeptr = ERR15;
@@ -6490,7 +6492,7 @@
*errorcodeptr = ERR58;
goto FAILED;
}
- recno = cb->bracount - recno + 1;
+ recno = (int)(cb->bracount + 1) - recno;
if (recno <= 0)
{
*errorcodeptr = ERR15;
@@ -8183,7 +8185,7 @@
while (IS_DIGIT(ptr[pp]))
{
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
- c = c*10 + ptr[pp++] - CHAR_0;
+ c = c*10 + (ptr[pp++] - CHAR_0);
}
if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
{
Modified: code/trunk/src/pcre2_dfa_match.c
===================================================================
--- code/trunk/src/pcre2_dfa_match.c 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/src/pcre2_dfa_match.c 2015-07-24 13:30:50 UTC (rev 326)
@@ -3172,7 +3172,7 @@
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
-options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
+options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF
#undef OO
Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/src/pcre2_match.c 2015-07-24 13:30:50 UTC (rev 326)
@@ -194,7 +194,7 @@
GETCHARINC(c, eptr);
GETCHARINC(d, p);
ur = GET_UCD(d);
- if (c != d && c != d + ur->other_case)
+ if (c != d && c != (uint32_t)((int)d + ur->other_case))
{
const uint32_t *pp = PRIV(ucd_caseless_sets) + ur->caseset;
for (;;)
@@ -211,7 +211,7 @@
/* Not in UTF mode */
{
- while (length-- > 0)
+ for (; length > 0; length--)
{
uint32_t cc, cp;
if (eptr >= mb->end_subject) return 1; /* Partial match */
@@ -226,11 +226,11 @@
}
/* In the caseful case, we can just compare the code units, whether or not we
-are in UT mode. */
+are in UTF mode. */
else
{
- while (length-- > 0)
+ for (; length > 0; length--)
{
if (eptr >= mb->end_subject) return 1; /* Partial match */
if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1; /*No match */
@@ -3342,7 +3342,10 @@
CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
RRETURN(MATCH_NOMATCH);
}
- while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
+ for (; length > 0; length--)
+ {
+ if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
+ }
}
else
#endif
@@ -6513,7 +6516,7 @@
#define FF (PCRE2_NOTEMPTY_SET|PCRE2_NE_ATST_SET)
#define OO (PCRE2_NOTEMPTY|PCRE2_NOTEMPTY_ATSTART)
-options |= (re->flags & FF) / ((FF & -FF) / (OO & -OO));
+options |= (re->flags & FF) / ((FF & (~FF+1)) / (OO & (~OO+1)));
#undef FF
#undef OO
@@ -6783,7 +6786,7 @@
end_subject = t;
}
- /* Advance to a unique first code unit if there is one. In 8-bit mode, the
+ /* Advance to a unique first code unit if there is one. In 8-bit mode, the
use of memchr() gives a big speed up. */
if (has_first_cu)
@@ -6801,8 +6804,8 @@
#else
start_match = memchr(start_match, first_cu, end_subject - start_match);
if (start_match == NULL) start_match = end_subject;
-#endif
- }
+#endif
+ }
}
/* Or to just after a linebreak for a multiline match */
Modified: code/trunk/src/pcre2_string_utils.c
===================================================================
--- code/trunk/src/pcre2_string_utils.c 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/src/pcre2_string_utils.c 2015-07-24 13:30:50 UTC (rev 326)
@@ -121,7 +121,7 @@
PRIV(strncmp)(PCRE2_SPTR str1, PCRE2_SPTR str2, size_t len)
{
PCRE2_UCHAR c1, c2;
-while (len-- > 0)
+for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
@@ -150,7 +150,7 @@
PRIV(strncmp_c8)(PCRE2_SPTR str1, const char *str2, size_t len)
{
PCRE2_UCHAR c1, c2;
-while (len-- > 0)
+for (; len > 0; len--)
{
c1 = *str1++;
c2 = *str2++;
Modified: code/trunk/src/pcre2_valid_utf.c
===================================================================
--- code/trunk/src/pcre2_valid_utf.c 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/src/pcre2_valid_utf.c 2015-07-24 13:30:50 UTC (rev 326)
@@ -131,11 +131,13 @@
PCRE2_ERROR_UTF8_ERR21 Byte with the illegal value 0xfe or 0xff
*/
-for (p = string; length-- > 0; p++)
+for (p = string; length > 0; p++)
{
register uint32_t ab, d;
c = *p;
+ length--;
+
if (c < 128) continue; /* ASCII character */
if (c < 0xc0) /* Isolated 10xx xxxx byte */
@@ -324,9 +326,10 @@
PCRE2_ERROR_UTF16_ERR3 Isolated low surrogate
*/
-for (p = string; length-- > 0; p++)
+for (p = string; length > 0; p++)
{
c = *p;
+ length--;
if ((c & 0xf800) != 0xd800)
{
@@ -368,7 +371,7 @@
PCRE2_ERROR_UTF32_ERR2 Character > 0x10ffff
*/
-for (p = string; length-- > 0; p++)
+for (p = string; length > 0; length--, p++)
{
c = *p;
if ((c & 0xfffff800u) != 0xd800u)
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2015-07-22 14:34:31 UTC (rev 325)
+++ code/trunk/src/pcre2test.c 2015-07-24 13:30:50 UTC (rev 326)
@@ -2606,7 +2606,7 @@
pp = pbuffer16;
if (!utf)
{
- while (len-- > 0) *pp++ = *p++;
+ for (; len > 0; len--) *pp++ = *p++;
}
else while (len > 0)
{
@@ -2683,7 +2683,7 @@
pp = pbuffer32;
if (!utf)
{
- while (len-- > 0) *pp++ = *p++;
+ for (; len > 0; len--) *pp++ = *p++;
}
else while (len > 0)
{
@@ -2723,10 +2723,9 @@
static PCRE2_SIZE
backchars(uint8_t *subject, PCRE2_SIZE offset, uint32_t count, BOOL utf)
{
-long int yield;
+if (!utf || test_mode == PCRE32_MODE)
+ return (count >= offset)? 0 : (offset - count);
-if (!utf || test_mode == PCRE32_MODE) yield = offset - count;
-
else if (test_mode == PCRE8_MODE)
{
PCRE2_SPTR8 pp = (PCRE2_SPTR8)subject + offset;
@@ -2735,7 +2734,7 @@
pp--;
while ((*pp & 0xc0) == 0x80) pp--;
}
- yield = pp - (PCRE2_SPTR8)subject;
+ return pp - (PCRE2_SPTR8)subject;
}
else /* 16-bit mode */
@@ -2746,10 +2745,8 @@
pp--;
if ((*pp & 0xfc00) == 0xdc00) pp--;
}
- yield = pp - (PCRE2_SPTR16)subject;
+ return pp - (PCRE2_SPTR16)subject;
}
-
-return (yield >= 0)? yield : 0;
}
@@ -2936,7 +2933,7 @@
if (c == 0)
{
if (len == mlen) return mid;
- c = len - mlen;
+ c = (int)len - (int)mlen;
}
if (c > 0) bot = mid + 1; else top = mid;
}
@@ -3712,7 +3709,7 @@
if (namecount > 0)
{
fprintf(outfile, "Named capturing subpatterns:\n");
- while (namecount-- > 0)
+ for (; namecount > 0; namecount--)
{
int imm2_size = test_mode == PCRE8_MODE ? 2 : 1;
uint32_t length = (uint32_t)STRLEN(nametable + imm2_size);
@@ -5378,7 +5375,7 @@
/* Check for mutually exclusive modifiers. */
c = dat_datctl.control & EXCLUSIVE_DAT_CONTROLS;
-if (c - (c & -c) != 0)
+if (c != 0 && c != (c & (~c+1)))
{
show_controls(c, "** Not allowed together:");
fprintf(outfile, "\n");