Revision: 631
http://www.exim.org/viewvc/pcre2?view=rev&revision=631
Author: ph10
Date: 2016-12-28 15:05:48 +0000 (Wed, 28 Dec 2016)
Log Message:
-----------
Add callout_error to pcre2test and stop the fuzzer after 100 callouts.
Modified Paths:
--------------
code/trunk/ChangeLog
code/trunk/doc/pcre2test.1
code/trunk/src/pcre2_fuzzsupport.c
code/trunk/src/pcre2test.c
code/trunk/testdata/testinput2
code/trunk/testdata/testoutput2
Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog 2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/ChangeLog 2016-12-28 15:05:48 UTC (rev 631)
@@ -266,7 +266,10 @@
pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide
characters to match (for example, /[\s[:^ascii:]]/).
+40. The callout_error modifier has been added to pcre2test to make it possible
+to return PCRE2_ERROR_CALLOUT from a callout.
+
Version 10.22 29-July-2016
--------------------------
Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1 2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/doc/pcre2test.1 2016-12-28 15:05:48 UTC (rev 631)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "23 December 2016" "PCRE 10.23"
+.TH PCRE2TEST 1 "28 December 2016" "PCRE 10.23"
.SH NAME
pcre2test - a program for testing Perl-compatible regular expressions.
.SH SYNOPSIS
@@ -1040,6 +1040,7 @@
altglobal alternative global matching
callout_capture show captures at callout time
callout_data=<n> set a value to pass via callouts
+ callout_error=<n>[:<m>] control callout error
callout_fail=<n>[:<m>] control callout failure
callout_none do not supply a callout function
copy=<number or name> copy captured substring
@@ -1133,15 +1134,21 @@
.sp
A callout function is supplied when \fBpcre2test\fP calls the library matching
functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is
-set, the current captured groups are output when a callout occurs.
+set, the current captured groups are output when a callout occurs. The default
+return from the callout function is zero, which allows matching to continue.
.P
The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
-only one number, 1 is returned instead of 0 when a callout of that number is
-reached. If two numbers are given, 1 is returned when callout <n> is reached
-for the <m>th time. Note that callouts with string arguments are always given
-the number zero. See "Callouts" below for a description of the output when a
-callout it taken.
+only one number, 1 is returned instead of 0 (causing matching to backtrack)
+when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
+is returned when callout <n> is reached and there have been at least <m>
+callouts. The \fBcallout_error\fP modifier is similar, except that
+PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be
+aborted. If both these modifiers are set for the same callout number,
+\fBcallout_error\fP takes precedence.
.P
+Note that callouts with string arguments are always given the number zero. See
+"Callouts" below for a description of the output when a callout it taken.
+.P
The \fBcallout_data\fP modifier can be given an unsigned or a negative number.
This is set as the "user data" that is passed to the matching function, and
passed back when the callout function is invoked. Any value other than zero is
@@ -1751,6 +1758,6 @@
.rs
.sp
.nf
-Last updated: 23 December 2016
+Last updated: 28 December 2016
Copyright (c) 1997-2016 University of Cambridge.
.fi
Modified: code/trunk/src/pcre2_fuzzsupport.c
===================================================================
--- code/trunk/src/pcre2_fuzzsupport.c 2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/src/pcre2_fuzzsupport.c 2016-12-28 15:05:48 UTC (rev 631)
@@ -32,6 +32,17 @@
PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)
+/* This is the callout function. Its only purpose is to halt matching if there
+are more than 100 callouts, as one way of stopping too much time being spent on
+fruitless matches. The callout data is a pointer to the counter. */
+
+static int callout_function(pcre2_callout_block *cb, void *callout_data)
+{
+(void)cb; /* Avoid unused parameter warning */
+*((uint32_t *)callout_data) += 1;
+return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
+}
+
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
"no previous prototype" warning when compiling at high warning level. */
@@ -77,6 +88,7 @@
for (i = 0; i < 2; i++)
{
+ uint32_t callout_count;
int errorcode;
PCRE2_SIZE erroroffset;
pcre2_code *code;
@@ -147,8 +159,9 @@
#endif
return 0;
}
- pcre2_set_match_limit(match_context, 100);
- pcre2_set_recursion_limit(match_context, 100);
+ (void)pcre2_set_match_limit(match_context, 100);
+ (void)pcre2_set_recursion_limit(match_context, 100);
+ (void)pcre2_set_callout(match_context, callout_function, &callout_count);
}
/* Match twice, with and without options */
@@ -168,6 +181,7 @@
((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
#endif
+ callout_count = 0;
errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)size, 0,
match_options, match_data, match_context);
Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c 2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/src/pcre2test.c 2016-12-28 15:05:48 UTC (rev 631)
@@ -175,7 +175,7 @@
#endif
#endif
-#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
+#define CFORE_UNSET UINT32_MAX /* Unset value for cfail/cerror fields */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
@@ -429,7 +429,7 @@
#define CTL_POSIX_NOSUB 0x00800000u
#define CTL_PUSH 0x01000000u /* These three must be */
#define CTL_PUSHCOPY 0x02000000u /* all in the same */
-#define CTL_PUSHTABLESCOPY 0x04000000u /* word. */
+#define CTL_PUSHTABLESCOPY 0x04000000u /* word. */
#define CTL_STARTCHAR 0x08000000u
#define CTL_USE_LENGTH 0x10000000u /* Same word as HEXPAT */
#define CTL_UTF8_INPUT 0x20000000u
@@ -495,6 +495,7 @@
uint32_t control; /* Must be in same position as patctl */
uint32_t control2; /* Must be in same position as patctl */
uint8_t replacement[REPLACE_MODSIZE]; /* So must this */
+ uint32_t cerror[2];
uint32_t cfail[2];
int32_t callout_data;
int32_t copy_numbers[MAXCPYGET];
@@ -549,6 +550,7 @@
{ "bsr", MOD_CTC, MOD_BSR, 0, CO(bsr_convention) },
{ "callout_capture", MOD_DAT, MOD_CTL, CTL_CALLOUT_CAPTURE, DO(control) },
{ "callout_data", MOD_DAT, MOD_INS, 0, DO(callout_data) },
+ { "callout_error", MOD_DAT, MOD_IN2, 0, DO(cerror) },
{ "callout_fail", MOD_DAT, MOD_IN2, 0, DO(cfail) },
{ "callout_info", MOD_PAT, MOD_CTL, CTL_CALLOUT_INFO, PO(control) },
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
@@ -5229,7 +5231,7 @@
PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
}
else
- {
+ {
PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
compiled_code); }
}
@@ -5459,8 +5461,17 @@
}
}
-return (cb->callout_number != dat_datctl.cfail[0])? 0 :
- (++callout_count >= dat_datctl.cfail[1])? 1 : 0;
+callout_count++;
+
+if (cb->callout_number == dat_datctl.cerror[0] &&
+ callout_count >= dat_datctl.cerror[1])
+ return PCRE2_ERROR_CALLOUT;
+
+if (cb->callout_number == dat_datctl.cfail[0] &&
+ callout_count >= dat_datctl.cfail[1])
+ return 1;
+
+return 0;
}
@@ -6123,7 +6134,9 @@
regmatch_t *pmatch = NULL;
const char *msg = "** Ignored with POSIX interface:";
- if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET)
+ if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
+ prmsg(&msg, "callout_error");
+ if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
prmsg(&msg, "callout_fail");
if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
prmsg(&msg, "copy");
@@ -7347,7 +7360,8 @@
def_datctl.oveccount = DEFAULT_OVECCOUNT;
def_datctl.copy_numbers[0] = -1;
def_datctl.get_numbers[0] = -1;
-def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET;
+def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
+def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;
/* Scan command line options. */
Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2 2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/testdata/testinput2 2016-12-28 15:05:48 UTC (rev 631)
@@ -1062,8 +1062,8 @@
/(?C0)(abc(?C1))*/I
abcabcabc
- abcabc\=callout_fail=1:3
- abcabcabc\=callout_fail=1:3
+ abcabc\=callout_fail=1:4
+ abcabcabc\=callout_fail=1:4
/(\d{3}(?C))*/I
123\=callout_capture
@@ -4956,4 +4956,7 @@
/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/
+/abcd/auto_callout
+ abcd\=callout_error=255:2
+
# End of testinput2
Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2 2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/testdata/testoutput2 2016-12-28 15:05:48 UTC (rev 631)
@@ -3550,7 +3550,7 @@
1 ^ ^ )*
0: abcabcabc
1: abc
- abcabc\=callout_fail=1:3
+ abcabc\=callout_fail=1:4
--->abcabc
0 ^ (
1 ^ ^ )*
@@ -3557,7 +3557,7 @@
1 ^ ^ )*
0: abcabc
1: abc
- abcabcabc\=callout_fail=1:3
+ abcabcabc\=callout_fail=1:4
--->abcabcabc
0 ^ (
1 ^ ^ )*
@@ -15439,6 +15439,13 @@
/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/
+/abcd/auto_callout
+ abcd\=callout_error=255:2
+--->abcd
+ +0 ^ a
+ +1 ^^ b
+Failed: error -37: callout error code
+
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data