[Pcre-svn] [640] code/trunk: Upgrade pcre2grep for Windows c…

Top Page
Delete this message
Author: Subversion repository
Date:  
To: pcre-svn
Subject: [Pcre-svn] [640] code/trunk: Upgrade pcre2grep for Windows courtesy of Jason Hood.
Revision: 640
          http://www.exim.org/viewvc/pcre2?view=rev&revision=640
Author:   ph10
Date:     2016-12-31 17:40:45 +0000 (Sat, 31 Dec 2016)
Log Message:
-----------
Upgrade pcre2grep for Windows courtesy of Jason Hood.


Modified Paths:
--------------
    code/trunk/ChangeLog
    code/trunk/configure.ac
    code/trunk/doc/pcre2grep.1
    code/trunk/src/pcre2grep.c
    code/trunk/testdata/grepoutput


Modified: code/trunk/ChangeLog
===================================================================
--- code/trunk/ChangeLog    2016-12-31 14:17:14 UTC (rev 639)
+++ code/trunk/ChangeLog    2016-12-31 17:40:45 UTC (rev 640)
@@ -284,7 +284,29 @@
 45. Minor cosmetic fix to pcre2test: move a variable that is not used under 
 Windows into the "not Windows" code.


+46. Applied Jason Hood's patches to upgrade pcre2grep under Windows and tidy
+some of the code:

+  * normalised the Windows condition by ensuring WIN32 is defined;
+  * enables the callout feature under Windows;
+  * adds globbing (Microsoft's implementation expands quoted args),
+    using a tweaked opendirectory;
+  * implements the is_*_tty functions for Windows;
+  * --color=always will write the ANSI sequences to file;
+  * add sequences 4 (underline works on Win10) and 5 (blink as bright
+    background, relatively standard on DOS/Win);
+  * remove the (char *) casts for the now-const strings;
+  * remove GREP_COLOUR (grep's command line allowed the 'u', but not
+    the environment), parsing GREP_COLORS instead;
+  * uses the current colour if not set, rather than black;
+  * add print_match for the undefined case;
+  * fixes a typo.
+
+In addition, colour settings containing anything other than digits and
+semicolon are ignored, and the colour controls are no longer output for empty 
+strings.
+
+
 Version 10.22 29-July-2016
 --------------------------



Modified: code/trunk/configure.ac
===================================================================
--- code/trunk/configure.ac    2016-12-31 14:17:14 UTC (rev 639)
+++ code/trunk/configure.ac    2016-12-31 17:40:45 UTC (rev 640)
@@ -148,16 +148,11 @@
                              [disable JIT support in pcre2grep]),
               , enable_pcre2grep_jit=yes)


-# Handle --disable-pcre2grep-callout (enabled by default) but not supported
-# for Windows.
-if test "$HAVE_WINDOWS_H" != "1"; then
-  AC_ARG_ENABLE(pcre2grep-callout,
-                AS_HELP_STRING([--disable-pcre2grep-callout],
-                               [disable callout script support in pcre2grep]),
-                , enable_pcre2grep_callout=yes)
-else
-  enable_pcre2grep_callout=no
-fi
+# Handle --disable-pcre2grep-callout (enabled by default)
+AC_ARG_ENABLE(pcre2grep-callout,
+              AS_HELP_STRING([--disable-pcre2grep-callout],
+                             [disable callout script support in pcre2grep]),
+              , enable_pcre2grep_callout=yes)


 # Handle --enable-rebuild-chartables
 AC_ARG_ENABLE(rebuild-chartables,
@@ -577,19 +572,14 @@
     have no effect unless SUPPORT_JIT is also defined.])
 fi


-# Currently pcre2grep callout string is not supported under Windows.
-
 if test "$enable_pcre2grep_callout" = "yes"; then
   if test "$HAVE_WINDOWS_H" != "1"; then
     if test "$HAVE_SYS_WAIT_H" != "1"; then
       AC_MSG_ERROR([Callout script support needs sys/wait.h.])
     fi
-    AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
-      Define to any value to enable callout script support in pcre2grep.])
-  else
-    AC_MSG_WARN([Callout script support is not available for Windows: disabled])
-    enable_pcre2grep_callout=no
   fi
+  AC_DEFINE([SUPPORT_PCRE2GREP_CALLOUT], [], [
+    Define to any value to enable callout script support in pcre2grep.])
 fi


if test "$enable_unicode" = "yes"; then

Modified: code/trunk/doc/pcre2grep.1
===================================================================
--- code/trunk/doc/pcre2grep.1    2016-12-31 14:17:14 UTC (rev 639)
+++ code/trunk/doc/pcre2grep.1    2016-12-31 17:40:45 UTC (rev 640)
@@ -1,4 +1,4 @@
-.TH PCRE2GREP 1 "31 October 2016" "PCRE2 10.23"
+.TH PCRE2GREP 1 "31 December 2016" "PCRE2 10.23"
 .SH NAME
 pcre2grep - a grep with Perl-compatible regular expressions.
 .SH SYNOPSIS
@@ -205,13 +205,22 @@
 because \fBpcre2grep\fP has to search for all possible matches in a line, not
 just one, in order to colour them all.
 .sp
-The colour that is used can be specified by setting the environment variable
-PCRE2GREP_COLOUR or PCRE2GREP_COLOR. If neither of these are set,
-\fBpcre2grep\fP looks for GREP_COLOUR or GREP_COLOR. The value of the variable
-should be a string of two numbers, separated by a semicolon. They are copied
-directly into the control string for setting colour on a terminal, so it is
-your responsibility to ensure that they make sense. If neither of the
-environment variables is set, the default is "1;31", which gives red.
+The colour that is used can be specified by setting one of the environment
+variables PCRE2GREP_COLOUR, PCRE2GREP_COLOR, PCREGREP_COLOUR, or
+PCREGREP_COLOR, which are checked in that order. If none of these are set,
+\fBpcre2grep\fP looks for GREP_COLORS or GREP_COLOR (in that order). The value
+of the variable should be a string of two numbers, separated by a semicolon, 
+except in the case of GREP_COLORS, which must start with "ms=" or "mt="
+followed by two semicolon-separated colours, terminated by the end of the
+string or by a colon. If GREP_COLORS does not start with "ms=" or "mt=" it is 
+ignored, and GREP_COLOR is checked.
+.sp
+If the string obtained from one of the above variables contains any characters
+other than semicolon or digits, the setting is ignored and the default colour 
+is used. The string is copied directly into the control string for setting
+colour on a terminal, so it is your responsibility to ensure that the values
+make sense. If no relevant environment variable is set, the default is "1;31",
+which gives red.
 .TP
 \fB-D\fP \fIaction\fP, \fB--devices=\fP\fIaction\fP
 If an input path is not a regular file or a directory, "action" specifies how
@@ -688,12 +697,12 @@
 .SH "CALLING EXTERNAL SCRIPTS"
 .rs
 .sp
-On non-Windows systems, \fBpcre2grep\fP has, by default, support for calling
-external programs or scripts during matching by making use of PCRE2's callout
-facility. However, this support can be disabled when \fBpcre2grep\fP is built.
-You can find out whether your binary has support for callouts by running it
-with the \fB--help\fP option. If the support is not enabled, all callouts in
-patterns are ignored by \fBpcre2grep\fP.
+\fBpcre2grep\fP has, by default, support for calling external programs or
+scripts during matching by making use of PCRE2's callout facility. However,
+this support can be disabled when \fBpcre2grep\fP is built. You can find out
+whether your binary has support for callouts by running it with the \fB--help\fP
+option. If the support is not enabled, all callouts in patterns are ignored by
+\fBpcre2grep\fP.
 .P
 A callout in a PCRE2 pattern is of the form (?C<arg>) where the argument is
 either a number or a quoted string (see the
@@ -784,6 +793,6 @@
 .rs
 .sp
 .nf
-Last updated: 31 October 2016
+Last updated: 31 December 2016
 Copyright (c) 1997-2016 University of Cambridge.
 .fi


Modified: code/trunk/src/pcre2grep.c
===================================================================
--- code/trunk/src/pcre2grep.c    2016-12-31 14:17:14 UTC (rev 639)
+++ code/trunk/src/pcre2grep.c    2016-12-31 17:40:45 UTC (rev 640)
@@ -58,14 +58,22 @@
 #include <sys/types.h>
 #include <sys/stat.h>


-#if defined(_WIN32) || defined(WIN32)
+#if (defined _WIN32 || (defined HAVE_WINDOWS_H && HAVE_WINDOWS_H)) && !defined WIN32
+#define WIN32
+#endif
+
+#ifdef WIN32
 #include <io.h>                /* For _setmode() */
 #include <fcntl.h>             /* For _O_BINARY */
 #endif


#ifdef SUPPORT_PCRE2GREP_CALLOUT
+#ifdef WIN32
+#include <process.h>
+#else
#include <sys/wait.h>
#endif
+#endif

#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -135,7 +143,7 @@
any messages written to stdout must have \r\n as their line terminator. This is
handled by using STDOUT_NL as the newline string. */

-#if defined(_WIN32) || defined(WIN32)
+#ifdef WIN32
#define STDOUT_NL "\r\n"
#else
#define STDOUT_NL "\n"
@@ -158,14 +166,14 @@
static const char *jfriedl_postfix = "";
#endif

-static const char *colour_string = (char *)"1;31";
+static const char *colour_string = "1;31";
static const char *colour_option = NULL;
static const char *dee_option = NULL;
static const char *DEE_option = NULL;
static const char *locale = NULL;
static const char *newline_arg = NULL;
-static const char *om_separator = (char *)"";
-static const char *stdin_name = (char *)"(standard input)";
+static const char *om_separator = "";
+static const char *stdin_name = "(standard input)";

static char *main_buffer = NULL;

@@ -180,7 +188,7 @@
static int total_count = 0;
static int counts_printed = 0;

-#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
+#ifdef WIN32
static int dee_action = dee_SKIP;
#else
static int dee_action = dee_READ;
@@ -209,6 +217,9 @@

static BOOL count_only = FALSE;
static BOOL do_colour = FALSE;
+#ifdef WIN32
+static BOOL do_ansi = FALSE;
+#endif
static BOOL file_offsets = FALSE;
static BOOL hyphenpending = FALSE;
static BOOL invert = FALSE;
@@ -463,7 +474,35 @@
}


+/*************************************************
+*         Parse GREP_COLORS                      *
+*************************************************/


+/* Extract ms or mt from GREP_COLORS.
+
+Argument:  the string, possibly NULL
+Returns:   the value of ms or mt, or NULL if neither present
+*/
+
+static char *
+parse_grep_colors(const char *gc)
+{
+static char seq[16];
+char *col;
+uint32_t len;
+if (gc == NULL) return NULL;
+col = strstr(gc, "ms=");
+if (col == NULL) col = strstr(gc, "mt=");
+if (col == NULL) return NULL;
+len = 0;
+col += 3;
+while (*col != ':' && *col != 0 && len < sizeof(seq)-1)
+  seq[len++] = *col++;
+seq[len] = 0;
+return seq;
+}
+
+
 /*************************************************
 *         Exit from the program                  *
 *************************************************/
@@ -691,6 +730,7 @@
 static void
 print_match(const char* buf, int length)
 {
+if (length == 0) return;
 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 FWRITE(buf, 1, length, stdout);
 if (do_colour) fprintf(stdout, "%c[0m", 0x1b);
@@ -704,11 +744,9 @@
 /* I (Philip Hazel) have no means of testing this code. It was contributed by
 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
 when it did not exist. David Byron added a patch that moved the #include of
-<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
-The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
-undefined when it is indeed undefined. */
+<windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after. */


-#elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
+#elif defined WIN32

#ifndef STRICT
# define STRICT
@@ -723,6 +761,11 @@
#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
#endif

+/* Allow opendirectory to provide globbing, since Microsoft started doing it
+wrong (expanding quoted arguments). */
+
+#define iswild(name) (strpbrk(name, "*?") != NULL)
+
typedef struct directory_type
{
HANDLE handle;
@@ -757,7 +800,10 @@
pcre2grep_exit(2);
}
memcpy(pattern, filename, len);
-memcpy(&(pattern[len]), "\\*", 3);
+if (iswild(filename))
+ pattern[len] = 0;
+else
+ memcpy(&(pattern[len]), "\\*", 3);
dir->handle = FindFirstFile(pattern, &(dir->data));
if (dir->handle != INVALID_HANDLE_VALUE)
{
@@ -815,18 +861,16 @@

/************* Test for a terminal in Windows **********/

-/* I don't know how to do this; assume never */
-
static BOOL
is_stdout_tty(void)
{
-return FALSE;
+return _isatty(_fileno(stdout));
}

static BOOL
is_file_tty(FILE *f)
{
-return FALSE;
+return _isatty(_fileno(f));
}


@@ -839,9 +883,18 @@
 static void
 print_match(const char* buf, int length)
 {
-if (do_colour) SetConsoleTextAttribute(hstdout, match_colour);
+if (length == 0) return;
+if (do_colour)
+  {
+  if (do_ansi) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
+    else SetConsoleTextAttribute(hstdout, match_colour);
+  }
 FWRITE(buf, 1, length, stdout);
-if (do_colour) SetConsoleTextAttribute(hstdout, csbi.wAttributes);
+if (do_colour)
+  {
+  if (do_ansi) fprintf(stdout, "%c[00m", 0x1b);
+    else SetConsoleTextAttribute(hstdout, csbi.wAttributes);
+  }
 }


/* Convert ANSI BGR format to RGB used by Windows */
@@ -848,9 +901,9 @@
#define BGR_RGB(x) ((x & 1 ? 4 : 0) | (x & 2) | (x & 4 ? 1 : 0))

 static WORD
-decode_ANSI_colour(char *cs)
+decode_ANSI_colour(const char *cs)
 {
-WORD result = 0;
+WORD result = csbi.wAttributes;
 while (*cs)
   {
   if (isdigit(*cs))
@@ -857,6 +910,8 @@
     {
     int code = atoi(cs);
     if (code == 1) result |= 0x08;
+    else if (code == 4) result |= 0x8000;
+    else if (code == 5) result |= 0x80;
     else if (code >= 30 && code <= 37) result = (result & 0xF8) | BGR_RGB(code - 30);
     else if (code == 39) result = (result & 0xF0) | (csbi.wAttributes & 0x0F);
     else if (code >= 40 && code <= 47) result = (result & 0x8F) | (BGR_RGB(code - 40) << 4);
@@ -880,8 +935,14 @@
 if (do_colour)
   {
   hstdout = GetStdHandle(STD_OUTPUT_HANDLE);
-  GetConsoleScreenBufferInfo(hstdout, &csbi);
-
+  /* This fails when redirected to con; try again if so. */
+  if (!GetConsoleScreenBufferInfo(hstdout, &csbi) && !do_ansi)
+    {
+    HANDLE hcon = CreateFile("CONOUT$", GENERIC_READ | GENERIC_WRITE,
+      FILE_SHARE_WRITE, NULL, OPEN_EXISTING, 0, NULL);
+    GetConsoleScreenBufferInfo(hcon, &csbi);
+    CloseHandle(hcon);
+    }
   match_colour = decode_ANSI_colour(colour_string);
   /* No valid colour found - turn off colouring */
   if (!match_colour) do_colour = FALSE;
@@ -927,6 +988,16 @@
 return FALSE;
 }


+
+/************* Print optionally coloured match when we can't do it **********/
+
+static void
+print_match(const char* buf, int length)
+{
+if (length == 0) return;
+FWRITE(buf, 1, length, stdout);
+}
+
#endif /* End of system-specific functions */


@@ -1637,7 +1708,9 @@
char *argsptr;
char **argsvector;
char **argsvectorptr;
+#ifndef WIN32
pid_t pid;
+#endif
int result = 0;

(void)unused; /* Avoid compiler warning */
@@ -1825,6 +1898,9 @@
*argsptr++ = '\0';
*argsvectorptr = NULL;

+#ifdef WIN32
+result = _spawnvp(_P_WAIT, argsvector[0], (const char * const *)argsvector);
+#else
pid = fork();

if (pid == 0)
@@ -1835,6 +1911,7 @@
}
else if (pid > 0)
(void)waitpid(pid, &result, 0);
+#endif

 free(args);
 free(argsvector);
@@ -2635,6 +2712,36 @@
     }
   }


+#ifdef WIN32
+if (iswild(pathname))
+  {
+  char buffer[1024];
+  char *nextfile;
+  char *name;
+  directory_type *dir = opendirectory(pathname);
+
+  if (dir == NULL)
+    return 0;
+
+  for (nextfile = name = pathname; *nextfile != 0; nextfile++)
+    if (*nextfile == '/' || *nextfile == '\\')
+      name = nextfile + 1;
+  *name = 0;
+
+  while ((nextfile = readdirectory(dir)) != NULL)
+    {
+    int frc;
+    sprintf(buffer, "%.512s%.128s", pathname, nextfile);
+    frc = grep_or_recurse(buffer, dir_recurse, FALSE);
+    if (frc > 1) rc = frc;
+     else if (frc == 0 && rc == 1) rc = 0;
+    }
+
+  closedirectory(dir);
+  return rc;
+  }
+#endif
+
 #if defined NATIVE_ZOS
  }
 #endif
@@ -3057,7 +3164,7 @@
 that stdout is a binary stream. Note that this means all other output to stdout
 must use STDOUT_NL to terminate lines. */


-#if defined(_WIN32) || defined(WIN32)
+#ifdef WIN32
_setmode( _fileno(stdout), _O_BINARY);
#endif

@@ -3281,7 +3388,7 @@
     switch (op->one_char)
       {
       case N_COLOUR:
-      colour_option = (char *)"auto";
+      colour_option = "auto";
       break;


       case 'o':
@@ -3446,17 +3553,16 @@
   pcre2_set_character_tables(compile_context, character_tables);
   }


-/* Sort out colouring. On non-Windows systems "auto" causes colouring only if
-the output is a terminal. On Windows systems "auto" is the same as "always". */
+/* Sort out colouring */

 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
   {
-  if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
-#if defined(_WIN32) || defined(WIN32)
-  else if (strcmp(colour_option, "auto") == 0) do_colour = TRUE;
-#else
+  if (strcmp(colour_option, "always") == 0)
+#ifdef WIN32
+    do_ansi = !is_stdout_tty(),
+#endif
+    do_colour = TRUE;
   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
-#endif
   else
     {
     fprintf(stderr, "pcre2grep: Unknown colour setting \"%s\"\n",
@@ -3467,10 +3573,15 @@
     {
     char *cs = getenv("PCRE2GREP_COLOUR");
     if (cs == NULL) cs = getenv("PCRE2GREP_COLOR");
-    if (cs == NULL) cs = getenv("GREP_COLOUR");
+    if (cs == NULL) cs = getenv("PCREGREP_COLOUR");
+    if (cs == NULL) cs = getenv("PCREGREP_COLOR");
+    if (cs == NULL) cs = parse_grep_colors(getenv("GREP_COLORS"));
     if (cs == NULL) cs = getenv("GREP_COLOR");
-    if (cs != NULL) colour_string = cs;
-#if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
+    if (cs != NULL) 
+      {
+      if (strspn(cs, ";0123456789") == strlen(cs)) colour_string = cs;
+      } 
+#ifdef WIN32
     init_colour_output();
 #endif
     }


Modified: code/trunk/testdata/grepoutput
===================================================================
(Binary files differ)