[Pcre-svn] [631] code/trunk: Add callout_error to pcre2test …

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [631] code/trunk: Add callout_error to pcre2test and stop the fuzzer after 100 callouts.
Revision: 631
          http://www.exim.org/viewvc/pcre2?view=rev&revision=631
Author:   ph10
Date:     2016-12-28 15:05:48 +0000 (Wed, 28 Dec 2016)
Log Message:
-----------
Add callout_error to pcre2test and stop the fuzzer after 100 callouts.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/doc/pcre2test.1
    code/trunk/src/pcre2_fuzzsupport.c
    code/trunk/src/pcre2test.c
    code/trunk/testdata/testinput2
    code/trunk/testdata/testoutput2


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/ChangeLog    2016-12-28 15:05:48 UTC (rev 631)
@@ -266,7 +266,10 @@
 pattern with PCRE2_UCP set without PCRE2_UTF if a class required all wide
 characters to match (for example, /[\s[:^ascii:]]/).


+40. The callout_error modifier has been added to pcre2test to make it possible
+to return PCRE2_ERROR_CALLOUT from a callout.

+
Version 10.22 29-July-2016
--------------------------


Modified: code/trunk/doc/pcre2test.1
===================================================================
--- code/trunk/doc/pcre2test.1    2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/doc/pcre2test.1    2016-12-28 15:05:48 UTC (rev 631)
@@ -1,4 +1,4 @@
-.TH PCRE2TEST 1 "23 December 2016" "PCRE 10.23"
+.TH PCRE2TEST 1 "28 December 2016" "PCRE 10.23"
 .SH NAME
 pcre2test - a program for testing Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -1040,6 +1040,7 @@
       altglobal                  alternative global matching
       callout_capture            show captures at callout time
       callout_data=<n>           set a value to pass via callouts
+      callout_error=<n>[:<m>]    control callout error
       callout_fail=<n>[:<m>]     control callout failure
       callout_none               do not supply a callout function
       copy=<number or name>      copy captured substring
@@ -1133,15 +1134,21 @@
 .sp
 A callout function is supplied when \fBpcre2test\fP calls the library matching
 functions, unless \fBcallout_none\fP is specified. If \fBcallout_capture\fP is
-set, the current captured groups are output when a callout occurs.
+set, the current captured groups are output when a callout occurs. The default 
+return from the callout function is zero, which allows matching to continue.
 .P
 The \fBcallout_fail\fP modifier can be given one or two numbers. If there is
-only one number, 1 is returned instead of 0 when a callout of that number is
-reached. If two numbers are given, 1 is returned when callout <n> is reached
-for the <m>th time. Note that callouts with string arguments are always given
-the number zero. See "Callouts" below for a description of the output when a
-callout it taken.
+only one number, 1 is returned instead of 0 (causing matching to backtrack)
+when a callout of that number is reached. If two numbers (<n>:<m>) are given, 1
+is returned when callout <n> is reached and there have been at least <m>
+callouts. The \fBcallout_error\fP modifier is similar, except that 
+PCRE2_ERROR_CALLOUT is returned, causing the entire matching process to be 
+aborted. If both these modifiers are set for the same callout number,
+\fBcallout_error\fP takes precedence.
 .P
+Note that callouts with string arguments are always given the number zero. See
+"Callouts" below for a description of the output when a callout it taken.
+.P
 The \fBcallout_data\fP modifier can be given an unsigned or a negative number.
 This is set as the "user data" that is passed to the matching function, and
 passed back when the callout function is invoked. Any value other than zero is
@@ -1751,6 +1758,6 @@
 .rs
 .sp
 .nf
-Last updated: 23 December 2016
+Last updated: 28 December 2016
 Copyright (c) 1997-2016 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2_fuzzsupport.c
===================================================================
--- code/trunk/src/pcre2_fuzzsupport.c    2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/src/pcre2_fuzzsupport.c    2016-12-28 15:05:48 UTC (rev 631)
@@ -32,6 +32,17 @@
    PCRE2_NOTEMPTY_ATSTART|PCRE2_PARTIAL_HARD| \
    PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT)


+/* This is the callout function. Its only purpose is to halt matching if there
+are more than 100 callouts, as one way of stopping too much time being spent on
+fruitless matches. The callout data is a pointer to the counter. */
+
+static int callout_function(pcre2_callout_block *cb, void *callout_data)
+{
+(void)cb; /* Avoid unused parameter warning */
+*((uint32_t *)callout_data) += 1;
+return (*((uint32_t *)callout_data) > 100)? PCRE2_ERROR_CALLOUT : 0;
+}
+
/* Putting in this apparently unnecessary prototype prevents gcc from giving a
"no previous prototype" warning when compiling at high warning level. */

@@ -77,6 +88,7 @@

 for (i = 0; i < 2; i++)
   {
+  uint32_t callout_count;
   int errorcode;
   PCRE2_SIZE erroroffset;
   pcre2_code *code;
@@ -147,8 +159,9 @@
 #endif
         return 0;
         }
-      pcre2_set_match_limit(match_context, 100);
-      pcre2_set_recursion_limit(match_context, 100); 
+      (void)pcre2_set_match_limit(match_context, 100);
+      (void)pcre2_set_recursion_limit(match_context, 100); 
+      (void)pcre2_set_callout(match_context, callout_function, &callout_count); 
       }


     /* Match twice, with and without options */
@@ -168,6 +181,7 @@
         ((match_options & PCRE2_PARTIAL_SOFT) != 0)? ",partial_soft" : "");
 #endif


+      callout_count = 0;
       errorcode = pcre2_match(code, (PCRE2_SPTR)data, (PCRE2_SIZE)size, 0,
         match_options, match_data, match_context);



Modified: code/trunk/src/pcre2test.c
===================================================================
--- code/trunk/src/pcre2test.c    2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/src/pcre2test.c    2016-12-28 15:05:48 UTC (rev 631)
@@ -175,7 +175,7 @@
 #endif
 #endif


-#define CFAIL_UNSET UINT32_MAX  /* Unset value for cfail fields */
+#define CFORE_UNSET UINT32_MAX  /* Unset value for cfail/cerror fields */
 #define DFA_WS_DIMENSION 1000   /* Size of DFA workspace */
 #define DEFAULT_OVECCOUNT 15    /* Default ovector count */
 #define JUNK_OFFSET 0xdeadbeef  /* For initializing ovector */
@@ -429,7 +429,7 @@
 #define CTL_POSIX_NOSUB                  0x00800000u
 #define CTL_PUSH                         0x01000000u  /* These three must be */
 #define CTL_PUSHCOPY                     0x02000000u  /*   all in the same */
-#define CTL_PUSHTABLESCOPY               0x04000000u  /*     word. */          
+#define CTL_PUSHTABLESCOPY               0x04000000u  /*     word. */
 #define CTL_STARTCHAR                    0x08000000u
 #define CTL_USE_LENGTH                   0x10000000u  /* Same word as HEXPAT */
 #define CTL_UTF8_INPUT                   0x20000000u
@@ -495,6 +495,7 @@
   uint32_t  control;       /* Must be in same position as patctl */
   uint32_t  control2;      /* Must be in same position as patctl */
    uint8_t  replacement[REPLACE_MODSIZE];  /* So must this */
+  uint32_t  cerror[2];
   uint32_t  cfail[2];
    int32_t  callout_data;
    int32_t  copy_numbers[MAXCPYGET];
@@ -549,6 +550,7 @@
   { "bsr",                        MOD_CTC,  MOD_BSR, 0,                          CO(bsr_convention) },
   { "callout_capture",            MOD_DAT,  MOD_CTL, CTL_CALLOUT_CAPTURE,        DO(control) },
   { "callout_data",               MOD_DAT,  MOD_INS, 0,                          DO(callout_data) },
+  { "callout_error",              MOD_DAT,  MOD_IN2, 0,                          DO(cerror) },
   { "callout_fail",               MOD_DAT,  MOD_IN2, 0,                          DO(cfail) },
   { "callout_info",               MOD_PAT,  MOD_CTL, CTL_CALLOUT_INFO,           PO(control) },
   { "callout_none",               MOD_DAT,  MOD_CTL, CTL_CALLOUT_NONE,           DO(control) },
@@ -5229,7 +5231,7 @@
     PCRE2_CODE_COPY_TO_VOID(patstack[patstacknext++], compiled_code);
     }
   else
-    {     
+    {
     PCRE2_CODE_COPY_WITH_TABLES_TO_VOID(patstack[patstacknext++],
       compiled_code); }
   }
@@ -5459,8 +5461,17 @@
     }
   }


-return (cb->callout_number != dat_datctl.cfail[0])? 0 :
-       (++callout_count >= dat_datctl.cfail[1])? 1 : 0;
+callout_count++;
+
+if (cb->callout_number == dat_datctl.cerror[0] &&
+    callout_count >= dat_datctl.cerror[1])
+  return PCRE2_ERROR_CALLOUT;
+
+if (cb->callout_number == dat_datctl.cfail[0] &&
+    callout_count >= dat_datctl.cfail[1])
+  return 1;
+
+return 0;
 }



@@ -6123,7 +6134,9 @@
regmatch_t *pmatch = NULL;
const char *msg = "** Ignored with POSIX interface:";

-  if (dat_datctl.cfail[0] != CFAIL_UNSET || dat_datctl.cfail[1] != CFAIL_UNSET)
+  if (dat_datctl.cerror[0] != CFORE_UNSET || dat_datctl.cerror[1] != CFORE_UNSET)
+    prmsg(&msg, "callout_error");
+  if (dat_datctl.cfail[0] != CFORE_UNSET || dat_datctl.cfail[1] != CFORE_UNSET)
     prmsg(&msg, "callout_fail");
   if (dat_datctl.copy_numbers[0] >= 0 || dat_datctl.copy_names[0] != 0)
     prmsg(&msg, "copy");
@@ -7347,7 +7360,8 @@
 def_datctl.oveccount = DEFAULT_OVECCOUNT;
 def_datctl.copy_numbers[0] = -1;
 def_datctl.get_numbers[0] = -1;
-def_datctl.cfail[0] = def_datctl.cfail[1] = CFAIL_UNSET;
+def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET;
+def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET;


/* Scan command line options. */


Modified: code/trunk/testdata/testinput2
===================================================================
--- code/trunk/testdata/testinput2    2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/testdata/testinput2    2016-12-28 15:05:48 UTC (rev 631)
@@ -1062,8 +1062,8 @@


 /(?C0)(abc(?C1))*/I
     abcabcabc
-    abcabc\=callout_fail=1:3
-    abcabcabc\=callout_fail=1:3
+    abcabc\=callout_fail=1:4
+    abcabcabc\=callout_fail=1:4


 /(\d{3}(?C))*/I
     123\=callout_capture
@@ -4956,4 +4956,7 @@


/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/

+/abcd/auto_callout
+    abcd\=callout_error=255:2
+
 # End of testinput2 


Modified: code/trunk/testdata/testoutput2
===================================================================
--- code/trunk/testdata/testoutput2    2016-12-28 12:26:12 UTC (rev 630)
+++ code/trunk/testdata/testoutput2    2016-12-28 15:05:48 UTC (rev 631)
@@ -3550,7 +3550,7 @@
   1 ^        ^    )*
  0: abcabcabc
  1: abc
-    abcabc\=callout_fail=1:3
+    abcabc\=callout_fail=1:4
 --->abcabc
   0 ^          (
   1 ^  ^       )*
@@ -3557,7 +3557,7 @@
   1 ^     ^    )*
  0: abcabc
  1: abc
-    abcabcabc\=callout_fail=1:3
+    abcabcabc\=callout_fail=1:4
 --->abcabcabc
   0 ^             (
   1 ^  ^          )*
@@ -15439,6 +15439,13 @@


/((?(?C'')\Q\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/

+/abcd/auto_callout
+    abcd\=callout_error=255:2
+--->abcd
+ +0 ^        a
+ +1 ^^       b
+Failed: error -37: callout error code
+
 # End of testinput2 
 Error -63: PCRE2_ERROR_BADDATA (unknown error number)
 Error -62: bad serialized data