ok ignore my question , i was reading again the doc's and i can see
that the first element is the
matched string and the rest are the substrings
Thanks
On Mon, Sep 22, 2008 at 11:28 PM, Meir Yanovich <meiry242@???> wrote:
> thanks for the reply one more question
> when i run the code i allways getting 2 subgroups when i expect to
> get only one
> the "link" (index.html)
> if i add this code ( and modify my abit) .
> #include <stdio.h> //meat and potatoes
> #include <string.h>
> #include <pcre.h>
> #define OVECCOUNT 30 /* should be a multiple of 3 */
> int main(int argc, char* argv[])
> {
> pcre *re;
> const char *error;
> int erroffset;
> int ovector[OVECCOUNT];
> int rc;
>
> char *regex = "<a[^>]+href\\s*=\\s*[\"']?([^\"'> ]+)";
> char *data = "<a href=\"index.html\">PCRE index page</a>";
> re = pcre_compile(
> regex, /* the pattern */
> 0, /* default options */
> &error, /* for error message */
> &erroffset, /* for error offset */
> NULL); /* use default character table */
> if (! re)
> {
> fprintf(stderr, "PCRE compilation failed at expression offset %d:
> %s\n", erroffset, error);
> return 1;
> }
>
>
> rc = pcre_exec(
> re, /* the compiled pattern */
> NULL, /* no extra data - we didn't study the pattern */
> data, /* the subject string */
> strlen(data), /* the length of the subject */
> 0, /* start at offset 0 in the subject */
> 0, /* default options */
> ovector, /* output vector for substring information */
> OVECCOUNT); /* number of elements in the output
> vector */
> if (rc < 0)
> {
> switch(rc)
> {
> case PCRE_ERROR_NOMATCH:
> printf("No match found in text\n");
> break;
> /*
> More cases defined...
> */
> default:
> printf("Match error %d\n", rc);
> break;
> return 1;
> }
> }
> const char **stringlist;
> int i;
> int result_match = pcre_get_substring_list(data, ovector, rc, &stringlist);
> if (result_match < 0)
> printf("get substring list failed %d\n", result_match);
> else
> {
> for (i = 0; i < rc; i++)
> printf("%2dL %s\n", i, stringlist[i]);
> if (stringlist[i] != NULL)
> printf("string list not terminated by NULL\n");
> /* free((void *)stringlist); */
> pcre_free_substring_list(stringlist);
> }
> }
>
> the out put is :
> 0L <a href="index.html
> 1L index.html
>
> but i have only one group in the string and its index.html
>
>
>
>
>
> On Mon, Sep 22, 2008 at 1:17 PM, Philip Hazel <ph10@???> wrote:
>> On Sun, 21 Sep 2008, Meir Yanovich wrote:
>>
>>> char *regex = "<a[^>]+href\s*=\s*[\"']?([^\"'> ]+)";
>>
>> You have made an elementary mistake. Try
>>
>> char *regex = "<a[^>]+href\\s*=\\s*[\"']?([^\"'> ]+)";
>> ^ ^
>> ^ ^
>> ^ ^
>> Philip
>>
>> --
>> Philip Hazel
>>
>