Hello all
im new to pcre , im trying to embed the engine into my application but
i have problem to
run regexp that capture the link out of href .
this is my regexp :
char *regex = "<a[^>]+href\s*=\s*[\"']?([^\"'> ]+)";
and this is my string data :
char *data = "<a href=\"index.html\">PCRE index page</a>";
please note that this regexp is vaild and working you can check it
here : http://www.gskinner.com/RegExr/
here is my sample code :
#include <stdio.h> //meat and potatoes
#include <string.h>
#include <pcre.h>
#define OVECCOUNT 30 /* should be a multiple of 3 */
int main(int argc, char* argv[])
{
pcre *re;
const char *error;
int erroffset;
int ovector[OVECCOUNT];
int rc;
char *regex = "<a[^>]+href\s*=\s*[\"']?([^\"'> ]+)";
char *data = "<a href=\"index.html\">PCRE index page</a>"; re = pcre_compile(
regex, /* the pattern */
0, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character table */
if (! re)
{
fprintf(stderr, "PCRE compilation failed at expression offset %d:
%s\n", erroffset, error);
return 1;
}
rc = pcre_exec(
re, /* the compiled pattern */
NULL, /* no extra data - we didn't study the pattern */
data, /* the subject string */
strlen(data), /* the length of the subject */
0, /* start at offset 0 in the subject */
0, /* default options */
ovector, /* output vector for substring information */
OVECCOUNT); /* number of elements in the outputvector */
if (rc < 0)
{
switch(rc)
{
case PCRE_ERROR_NOMATCH:
printf("No match found in text\n");
break;
/*
More cases defined...
*/
default:
printf("Match error %d\n", rc);
break;
return 1;
}
}
if (rc < 3)
{
printf("Match did not catch all the groups\n");
return 1;
}
/*ovector[0]..ovector[1] are the entire matched string*/
char *name_start = data + ovector[2]; int name_length = ovector[3] -
ovector[2]; char *domain_start = data + ovector[4]; int domain_length
= ovector[5] - ovector[4];
/* Finally, print the match */
printf("Mail from: %.*s domain: %.*s\n",
name_length, name_start,
domain_length, domain_start);
return 0;
} //END main
Thanks for helping