[Pcre-svn] [895] code/trunk/doc: Documentation update.

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [895] code/trunk/doc: Documentation update.
Revision: 895
          http://www.exim.org/viewvc/pcre2?view=rev&revision=895
Author:   ph10
Date:     2017-12-24 10:27:13 +0000 (Sun, 24 Dec 2017)
Log Message:
-----------
Documentation update.


Modified Paths:
--------------
    code/trunk/doc/html/pcre2demo.html
    code/trunk/doc/pcre2demo.3


Modified: code/trunk/doc/html/pcre2demo.html
===================================================================
--- code/trunk/doc/html/pcre2demo.html    2017-12-23 17:15:51 UTC (rev 894)
+++ code/trunk/doc/html/pcre2demo.html    2017-12-24 10:27:13 UTC (rev 895)
@@ -228,6 +228,21 @@
 if (rc == 0)
   printf("ovector was not big enough for all the captured substrings\n");


+/* We must guard against patterns such as /(?=.\K)/ that use \K in an assertion
+to set the start of a match later than its end. In this demonstration program,
+we just detect this case and give up. */
+
+if (ovector[0] > ovector[1])
+  {
+  printf("\\K was used in an assertion to set the match start after its end.\n"
+    "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
+      (char *)(subject + ovector[1]));
+  printf("Run abandoned\n");
+  pcre2_match_data_free(match_data);
+  pcre2_code_free(re);
+  return 1;
+  }
+
 /* Show substrings stored in the output vector by number. Obviously, in a real
 application you might want to do things other than print them. */


@@ -355,6 +370,29 @@
     options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
     }


+  /* If the previous match was not an empty string, there is one tricky case to
+  consider. If a pattern contains \K within a lookbehind assertion at the
+  start, the end of the matched string can be at the offset where the match
+  started. Without special action, this leads to a loop that keeps on matching
+  the same substring. We must detect this case and arrange to move the start on
+  by one character. The pcre2_get_startchar() function returns the starting
+  offset that was passed to pcre2_match(). */
+
+  else
+    {
+    PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
+    if (start_offset <= startchar)
+      {
+      if (startchar >= subject_length) break;   /* Reached end of subject.   */
+      start_offset = startchar + 1;             /* Advance by one character. */
+      if (utf8)                                 /* If UTF-8, it may be more  */
+        {                                       /*   than one code unit.     */
+        for (; start_offset < subject_length; start_offset++)
+          if ((subject[start_offset] & 0xc0) != 0x80) break;
+        }
+      }
+    }
+
   /* Run the next matching operation */


   rc = pcre2_match(
@@ -419,6 +457,21 @@
   if (rc == 0)
     printf("ovector was not big enough for all the captured substrings\n");


+  /* We must guard against patterns such as /(?=.\K)/ that use \K in an
+  assertion to set the start of a match later than its end. In this
+  demonstration program, we just detect this case and give up. */
+
+  if (ovector[0] > ovector[1])
+    {
+    printf("\\K was used in an assertion to set the match start after its end.\n"
+      "From end to start the match was: %.*s\n", (int)(ovector[0] - ovector[1]),
+        (char *)(subject + ovector[1]));
+    printf("Run abandoned\n");
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
   /* As before, show substrings stored in the output vector by number, and then
   also any named substrings. */



Modified: code/trunk/doc/pcre2demo.3
===================================================================
--- code/trunk/doc/pcre2demo.3    2017-12-23 17:15:51 UTC (rev 894)
+++ code/trunk/doc/pcre2demo.3    2017-12-24 10:27:13 UTC (rev 895)
@@ -228,6 +228,21 @@
 if (rc == 0)
   printf("ovector was not big enough for all the captured substrings\en");


+/* We must guard against patterns such as /(?=.\eK)/ that use \eK in an assertion
+to set the start of a match later than its end. In this demonstration program,
+we just detect this case and give up. */
+
+if (ovector[0] > ovector[1])
+  {
+  printf("\e\eK was used in an assertion to set the match start after its end.\en"
+    "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]),
+      (char *)(subject + ovector[1]));
+  printf("Run abandoned\en");
+  pcre2_match_data_free(match_data);
+  pcre2_code_free(re);
+  return 1;
+  }
+
 /* Show substrings stored in the output vector by number. Obviously, in a real
 application you might want to do things other than print them. */


@@ -355,6 +370,29 @@
     options = PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED;
     }


+  /* If the previous match was not an empty string, there is one tricky case to
+  consider. If a pattern contains \eK within a lookbehind assertion at the
+  start, the end of the matched string can be at the offset where the match
+  started. Without special action, this leads to a loop that keeps on matching
+  the same substring. We must detect this case and arrange to move the start on
+  by one character. The pcre2_get_startchar() function returns the starting
+  offset that was passed to pcre2_match(). */
+
+  else
+    {
+    PCRE2_SIZE startchar = pcre2_get_startchar(match_data);
+    if (start_offset <= startchar)
+      {
+      if (startchar >= subject_length) break;   /* Reached end of subject.   */
+      start_offset = startchar + 1;             /* Advance by one character. */
+      if (utf8)                                 /* If UTF-8, it may be more  */
+        {                                       /*   than one code unit.     */
+        for (; start_offset < subject_length; start_offset++)
+          if ((subject[start_offset] & 0xc0) != 0x80) break;
+        }
+      }
+    }
+
   /* Run the next matching operation */


   rc = pcre2_match(
@@ -419,6 +457,21 @@
   if (rc == 0)
     printf("ovector was not big enough for all the captured substrings\en");


+  /* We must guard against patterns such as /(?=.\eK)/ that use \eK in an
+  assertion to set the start of a match later than its end. In this
+  demonstration program, we just detect this case and give up. */
+
+  if (ovector[0] > ovector[1])
+    {
+    printf("\e\eK was used in an assertion to set the match start after its end.\en"
+      "From end to start the match was: %.*s\en", (int)(ovector[0] - ovector[1]),
+        (char *)(subject + ovector[1]));
+    printf("Run abandoned\en");
+    pcre2_match_data_free(match_data);
+    pcre2_code_free(re);
+    return 1;
+    }
+
   /* As before, show substrings stored in the output vector by number, and then
   also any named substrings. */