Re: [pcre-dev] probably basic pcre question

Top Page
Delete this message
Author: jamal
Date:  
To: pcre-dev
Subject: Re: [pcre-dev] probably basic pcre question

Thanks for taking the time.

On Wed, 2008-12-03 at 15:14 +0000, Philip Hazel wrote:

> That doesn't make any sense. Your pattern "(^: (.*?)(\r\n\r\n))" starts
> matching : at the start of a line; your ovector 0 doesn't. Perhaps you
> copied that wrong.


Apologies - that was a cutnpaste mistake.

> Is it really meant to be "(^E: (.*?)(\r\n\r\n))"?


Indeed it is.

> If
> so, there's still something wrong, because parentheses number 3 contain
> only \r\n\r\n and yet your ovector buffer 3 contains other stuff.
> Equally, I can't see why #2 should contain the E:.


Sigh - you are right about #2/#3; the proper output looks like:

----
0: E: e1\x0d\x0aA: a1\x0d\x0aB: b1\x0d\x0a\x0d\x0aCRAP: c1
1: E: e1\x0d\x0aA: a1\x0d\x0aB: b1\x0d\x0a\x0d\x0aCRAP: c1
2: e1\x0d\x0aA: a1\x0d\x0aB: b1\x0d\x0a\x0d\x0aCRAP: c1
3: \x0d\x0a\x0d\x0aB: b1\x0d\x0a\x0d\x0aCRAP: c1
----

> Perhaps you should
> post your test program so we can see what the C looks like.


It is part of a larger program - Attached is a reduced version which
wont compile because i have taken out the network setup part but has all
the relevant pcre pieces. I hope that is fine by you.

> Note also that pcre_compile("\r\n"...) isn't the same as pcretest with
> /\r\n/ but that shouldn't actually matter.


Ok, that interesting; is it because of different passed options?

cheers,
jamal
#include <stdio.h>
#include <string.h>
#include <pcre.h>

#include<netdb.h>
#include<unistd.h>
#include<arpa/inet.h>
#include<sys/types.h>
#include<sys/socket.h>
#include<netinet/in.h>

#define MAX_MSG_SIZE 512
#define SERVER_ADDRESS "10.0.0.221"
#define CLIENT_ADDRESS "10.0.0.234"

#define OVECCOUNT 30        /* should be a multiple of 3 */


static struct {
    int type;
    const char *pname;
    int plen;
    pcre *re;
} amiregs[] = {
    {5, "M: (.*\r\n)" , 0, NULL},
    {6, "R: (.*\r\n)" , 0, NULL},
    {8, "(^E: (.*?)(\r\n\r\n))" , 0, NULL},
};


void print_amiregs()
{
    int i;
    for (i = 0; i < sizeof(amiregs) / sizeof(amiregs[0]); i++) {
        printf("string: %s type %d length %d comp %p\n",
               amiregs[i].pname, amiregs[i].type,
               strlen(amiregs[i].pname), amiregs[i].re);
    }


}

int setup_amiregs()
{
    int i;
    const char *error;
    int erroffset;


    for (i = 0; i < sizeof(amiregs) / sizeof(amiregs[0]); i++) {
        amiregs[i].plen = strlen(amiregs[i].pname);
        amiregs[i].re = pcre_compile(amiregs[i].pname,
                         PCRE_DOTALL, 
                         &error, &erroffset, NULL);


        if (amiregs[i].re == NULL) {
            printf ("PCRE compile:%d failed offset %d: %s\n",
                 i, erroffset, error);
            return 1;
        }
    }
    return 0;
}


int find_amistr(char *subject, int subject_length,  int *ovector, int *ami_i)
{
    int i, rc = 0;
    pcre *re;
    *ami_i = -1;


    for (i = 0; i < sizeof(amiregs) / sizeof(amiregs[0]); i++) {


        re = amiregs[i].re;
        rc = pcre_exec(re,
                   NULL,
                   subject,
                   subject_length, 
                   0, 
                   0,
                   ovector, 
                   OVECCOUNT);


        if (rc >= 0) {
            *ami_i = i;
            break;
        }
    }


    return rc;
}


void cleanup_amiregs()
{
    int i;
    for (i = 0; i < sizeof(amiregs) / sizeof(amiregs[0]); i++) {
        pcre_free(amiregs[i].re);


    }


}

void print_amistr(char *subject, int *ovector, int rc)
{
    int i = 0;


    for (i = 0; i < rc; i++) {
        char *substring_start = subject + ovector[2 * i];
        int substring_length = ovector[2 * i + 1] - ovector[2 * i];
        printf("%2d: %d %s##\n", i, substring_length, substring_start);
    }


}

int main(int argc, char **argv)
{
    char msg[MAX_MSG_SIZE];
    int sd = -1, ms = 0, seq = 0;
    char *subject;
    int ovector[OVECCOUNT];
    int rc, i = 0;


    rc = setup_amiregs();


    if (rc != 0) {
        printf("Failed to setup regexps %d\n", rc);
        cleanup_amiregs();
        return 1;
    }


    print_amiregs();


    sd = net_setup();
    if (sd < 0) {
        cleanup_amiregs();
        return 1;
    }


    //initialize the message to server somewhere here
    // send ....
    send(sd,msg,strlen(msg)+1,0);
    /* Receive and process responses */
    while (1) {
        ms = recv(sd, msg, MAX_MSG_SIZE, 0);
        if (ms > 0) {
            //printf("\nreceived(%d): **\n %s\n**\n",ms, msg);
            subject = msg;
            rc = find_amistr(subject, ms, ovector, &i);
            if (rc >= 0) {
                /* print the vectors that matched ...*/
                print_amistr(subject, ovector, rc);
                printf("\n%d Match len %d at offset %d next %d amindex %d\n", seq, subl, ovector[0],ovector[1],i);


            } else {
                printf("\nMatch failed for <<%s>> \n", subject);
            }


            if (rc == 0) {
                rc = OVECCOUNT / 3;
                printf("ovector only has room for %d captured substrings\n", rc - 1);
            }


        }
        seq++;
        memset(msg, 0, ms);
    }


    cleanup_amiregs();
    close(sd);
    return 0;
}