[Pcre-svn] [762] code/trunk/src/pcre2_match.c: Tidy comments…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [762] code/trunk/src/pcre2_match.c: Tidy comments about UTF case-independence.
Revision: 762
          http://www.exim.org/viewvc/pcre2?view=rev&revision=762
Author:   ph10
Date:     2017-04-20 17:51:36 +0100 (Thu, 20 Apr 2017)
Log Message:
-----------
Tidy comments about UTF case-independence.


Modified Paths:
--------------
    code/trunk/src/pcre2_match.c


Modified: code/trunk/src/pcre2_match.c
===================================================================
--- code/trunk/src/pcre2_match.c    2017-04-20 16:34:35 UTC (rev 761)
+++ code/trunk/src/pcre2_match.c    2017-04-20 16:51:36 UTC (rev 762)
@@ -929,7 +929,9 @@


     /* ===================================================================== */
     /* Match a single character, caselessly. If we are at the end of the
-    subject, give up immediately. */
+    subject, give up immediately. We get here only when the pattern character 
+    has at most one other case. Characters with more than two cases are coded 
+    as OP_PROP with the pseudo-property PT_CLIST. */


     case OP_CHARI:
     if (Feptr >= mb->end_subject)
@@ -945,10 +947,10 @@
       Fecode++;
       GETCHARLEN(fc, Fecode, Flength);


-      /* If the pattern character's value is < 128, we have only one byte, and
-      we know that its other case must also be one byte long, so we can use the
-      fast lookup table. We know that there is at least one byte left in the
-      subject. */
+      /* If the pattern character's value is < 128, we know that its other case
+      (if any) is also < 128 (and therefore only one code unit long in all 
+      code-unit widths), so we can use the fast lookup table. We checked above
+      that there is at least one character left in the subject. */


       if (fc < 128)
         {
@@ -958,9 +960,10 @@
         Feptr++;
         }


-      /* Otherwise we must pick up the subject character. Note that we cannot
-      use the value of "Flength" to check for sufficient bytes left, because the
-      other case of the character may have more or fewer bytes.  */
+      /* Otherwise we must pick up the subject character and use Unicode 
+      property support to test its other case. Note that we cannot use the
+      value of "Flength" to check for sufficient bytes left, because the other
+      case of the character may have more or fewer code units. */


       else
         {
@@ -967,23 +970,13 @@
         uint32_t dc;
         GETCHARINC(dc, Feptr);
         Fecode += Flength;
-
-        /* If we have Unicode property support, we can use it to test the other
-        case of the character, if there is one. */
-
-        if (fc != dc)
-          {
-#ifdef SUPPORT_UNICODE
-          if (dc != UCD_OTHERCASE(fc))
-#endif
-            RRETURN(MATCH_NOMATCH);
-          }
+        if (dc != fc && dc != UCD_OTHERCASE(fc)) RRETURN(MATCH_NOMATCH);
         }
       }
     else
 #endif   /* SUPPORT_UNICODE */


-    /* Not UTF mode */
+    /* Not UTF mode; use the table for characters < 256. */
       {
       if (TABLE_GET(Fecode[1], mb->lcc, Fecode[1])
           != TABLE_GET(*Feptr, mb->lcc, *Feptr)) RRETURN(MATCH_NOMATCH);