thanks for the reply one more question
when i run the code i allways getting 2 subgroups when i expect to
get only one
the "link" (index.html)
if i add this code ( and modify my abit) .
#include <stdio.h> //meat and potatoes
#include <string.h>
#include <pcre.h>
#define OVECCOUNT 30 /* should be a multiple of 3 */
int main(int argc, char* argv[])
{
pcre *re;
const char *error;
int erroffset;
int ovector[OVECCOUNT];
int rc;
char *regex = "<a[^>]+href\\s*=\\s*[\"']?([^\"'> ]+)";
char *data = "<a href=\"index.html\">PCRE index page</a>";
re = pcre_compile(
regex, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character table */
if (! re)
{
fprintf(stderr, "PCRE compilation failed at expression offset %d:
%s\n", erroffset, error);
return 1;
}
rc = pcre_exec(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
data, /* the subject string */
strlen(data), /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the output
vector */
if (rc < 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH:
printf("No match found in text\n");
break;
/*
More cases defined...
*/
default:
printf("Match error %d\n", rc);
break;
return 1;
}
}
const char **stringlist;
int i;
int result_match = pcre_get_substring_list(data, ovector, rc, &stringlist);
if (result_match < 0)
printf("get substring list failed %d\n", result_match);
else
{
for (i = 0; i < rc; i++)
printf("%2dL %s\n", i, stringlist[i]);
if (stringlist[i] != NULL)
printf("string list not terminated by NULL\n");
/* free((void *)stringlist); */
pcre_free_substring_list(stringlist);
}
}
the out put is :
0L <a href="index.html
1L index.html
but i have only one group in the string and its index.html
On Mon, Sep 22, 2008 at 1:17 PM, Philip Hazel <ph10@???> wrote:
> On Sun, 21 Sep 2008, Meir Yanovich wrote:
>
>> char *regex = "<a[^>]+href\s*=\s*[\"']?([^\"'> ]+)";
>
> You have made an elementary mistake. Try
>
> char *regex = "<a[^>]+href\\s*=\\s*[\"']?([^\"'> ]+)";
> ^ ^
> ^ ^
> ^ ^
> Philip
>
> --
> Philip Hazel
>