srci prefer mine ...

About

This library allows easy parsing of ABNF written grammar, by providing a “compilable metalanguage” as a set of C macros. The library automatically build a C binary tree of the grammar in case of syntax matching. The use of the library consists in writing the grammar ABNF rules as C functions (one function per rule). Rule functions must have the following C prototype (literally the same, except for the function name - i.e. the State * parameter must be named st - it’s important for the macros to work properly) : > void rulename(State *st);

The function body shouldn’t contains anything other than blocks (see Blocks section), that describe the rule content. Blocks must be used as it inside of rule functions (i.e. no need of init process or local variables declaration).

The use of the parser goes through a State struct which must be defined and initialized by the calling function (e.g. main()). This struct makes it possible to transmit the text to be parsed, as well as the subsequent retrieval of the syntax tree.

NB: The text must be syntactically correct (i.e. st.status == MATCH) to return a valid syntax tree. If the text is syntactically incorrect st.status == FAIL, and the syntaxe tree is irrelevant.

The definition of State is as follows:

typedef struct {
    Node *node;     /* syntaxe tree */
    Node *left;
    char *current;  /* next character to be parsed */
    int len;        /* len of text to be parsed  */
    int status;     /* syntaxe matching status (FAIL or MATCH )*/
} State;

A typical State initialization would be:

int
main(int argc, char *argv[])
{
    char *s = "foo";
    State st = { NULL, NULL, s, strlen(s), MATCH };
    return 0;
}

The next step is to pass the State struct to the “start rule” (i.e. the one which isn’t in any ABNF rule right part) :

static void  start(State *);

int
main(int argc, char *argv[])
{
    char *s = "foo";
    State st = { NULL, NULL, s, strlen(s), MATCH };

    start(&st);
    return 0;
}

static void
start(State *st)
{
    STR(foo)
}

The library provide the printtree(Node *tree, int indent) function to print the returned syntax tree (where tree is the syntax tree, and indent the base indent in number of htab) :

static void  start(State *);

int
main(int argc, char *argv[])
{
    char *s = "foo";
    State st = { NULL, NULL, s, strlen(s), MATCH };

    start(&st);
    if (st.status == MATCH)
        printtree(st.node, 0);
    return 0;
}

static void
start(State *st)
{
    STR(foo)
}
% ./main
string: "foo"

NB

The following ABNF basic rules are provided by the library as rule function (see RFC 5234 - Appendix B.1.):

void     alpha(State *);
void     bit(State *);
void     crlf(State *);
void     digit(State *);
void     dquote(State *);
void     hexdig(State *);
void     htab(State *);
void     lpar(State *);
void     lwsp(State *);
void     octet(State *);
void     rpar(State *);
void     sp(State *);
void     vchar(State *);
void     wsp(State *);

Blocks

ASCIIRANGE(n, m)

ABNF

C

void
rulename(State *st)
{
   /* rulename = %xn-m */
   ASCIIRANGE(n, m)
   /* rulename = %xn */
   ACIIRANGE(n, n)
}

STR(s)

STR() is case insensitive, use ASCIIRANGE() for case sensitive.

ABNF

C

void
rulename(State *st)
{
    STR(s)
}

CHOOSE(block) & OR(block)

ABNF

C

void
rulename(State *st)
{
    CHOOSE(
        OR(block1)
        OR(block2)
        ...
        OR(blockn)
    )
}

EVERY(block) & AND(block)

ABNF

C

void
rulename(State *st)
{
    EVERY(
        AND(block1)
        AND(block2)
        ...
        AND(blockn)
    )
}

REPEAT(n, m, block)

m and n must be an integer greater or equal to 0, or the INF constant.

ABNF

C

void
rulename(State *st)
{
    /* rulename = n*m(block) */
    REPEAT(n, m,
        bloc
    )
    /* rulename = n*(block) */
    REPEAT(n, INF,
        bloc
    )
    /* rulename = *m(block) */
    REPEAT(0, m,
        bloc
    )
}

OPTIONAL(block)

ABNF

C

void
rulename(State *st)
{
    OPTIONAL(block)
}

RULENAME(rulename1)

ABNF

C

void
rulename(State *st)
{
    RULENAME(bit)
}

void
bit(State *st)
{
    /* bit = %x30-31 ;[0-1] */
    ASCIIRANGE(0x30, 0x31))
}

Examples

ABNF

date            =       day month year
year            =       4*DIGIT
month           =       FWS month-name FWS
month-name      =       "Jan" / "Feb" / "Mar" / "Apr" /
                       "May" / "Jun" / "Jul" / "Aug" /
                       "Sep" / "Oct" / "Nov" / "Dec"
day             =       1*2DIGIT

C

void
date(State *st)
{
    EVERY(
        AND(RULENAME(day))
        AND(RULENAME(month))
        AND(RULENAME(year))
    )
}

void
year(State *st)
{
    REPEAT(4, INF,
        RULENAME(digit)
    )
}

void
month(State *st)
{
    EVERY(
        AND(ASCIIRANGE(0x2f, 0x2f))
        AND(RULENAME(month_name))
        AND(ASCIIRANGE(0x2f, 0x2f))
    )
}

void
month_name(State *st)
{
    CHOOSE(
        OR(STR(Jan))
        OR(STR(Feb))
        /* ... */
        OR(STR(Dec))
    )
}

void
day(State *st)
{
    REPEAT(1, 2,
        RULENAME(digit)
    )
}