123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777 |
- %{
- #include "common.h"
- #include "smtp.h"
- #include <ctype.h>
- char *yylp; /* next character to be lex'd */
- int yydone; /* tell yylex to give up */
- char *yybuffer; /* first parsed character */
- char *yyend; /* end of buffer to be parsed */
- Node *root;
- Field *firstfield;
- Field *lastfield;
- Node *usender;
- Node *usys;
- Node *udate;
- char *startfield, *endfield;
- int originator;
- int destination;
- int date;
- int received;
- int messageid;
- %}
- %term WORD
- %term DATE
- %term RESENT_DATE
- %term RETURN_PATH
- %term FROM
- %term SENDER
- %term REPLY_TO
- %term RESENT_FROM
- %term RESENT_SENDER
- %term RESENT_REPLY_TO
- %term SUBJECT
- %term TO
- %term CC
- %term BCC
- %term RESENT_TO
- %term RESENT_CC
- %term RESENT_BCC
- %term REMOTE
- %term PRECEDENCE
- %term MIMEVERSION
- %term CONTENTTYPE
- %term MESSAGEID
- %term RECEIVED
- %term MAILER
- %term BADTOKEN
- %start msg
- %%
- msg : fields
- | unixfrom '\n' fields
- ;
- fields : '\n'
- { yydone = 1; }
- | field '\n'
- | field '\n' fields
- ;
- field : dates
- { date = 1; }
- | originator
- { originator = 1; }
- | destination
- { destination = 1; }
- | subject
- | optional
- | ignored
- | received
- | precedence
- | error '\n' field
- ;
- unixfrom : FROM route_addr unix_date_time REMOTE FROM word
- { freenode($1); freenode($4); freenode($5);
- usender = $2; udate = $3; usys = $6;
- }
- ;
- originator : REPLY_TO ':' address_list
- { newfield(link3($1, $2, $3), 1); }
- | RETURN_PATH ':' route_addr
- { newfield(link3($1, $2, $3), 1); }
- | FROM ':' mailbox_list
- { newfield(link3($1, $2, $3), 1); }
- | SENDER ':' mailbox
- { newfield(link3($1, $2, $3), 1); }
- | RESENT_REPLY_TO ':' address_list
- { newfield(link3($1, $2, $3), 1); }
- | RESENT_SENDER ':' mailbox
- { newfield(link3($1, $2, $3), 1); }
- | RESENT_FROM ':' mailbox
- { newfield(link3($1, $2, $3), 1); }
- ;
- dates : DATE ':' date_time
- { newfield(link3($1, $2, $3), 0); }
- | RESENT_DATE ':' date_time
- { newfield(link3($1, $2, $3), 0); }
- ;
- destination : TO ':'
- { newfield(link2($1, $2), 0); }
- | TO ':' address_list
- { newfield(link3($1, $2, $3), 0); }
- | RESENT_TO ':'
- { newfield(link2($1, $2), 0); }
- | RESENT_TO ':' address_list
- { newfield(link3($1, $2, $3), 0); }
- | CC ':'
- { newfield(link2($1, $2), 0); }
- | CC ':' address_list
- { newfield(link3($1, $2, $3), 0); }
- | RESENT_CC ':'
- { newfield(link2($1, $2), 0); }
- | RESENT_CC ':' address_list
- { newfield(link3($1, $2, $3), 0); }
- | BCC ':'
- { newfield(link2($1, $2), 0); }
- | BCC ':' address_list
- { newfield(link3($1, $2, $3), 0); }
- | RESENT_BCC ':'
- { newfield(link2($1, $2), 0); }
- | RESENT_BCC ':' address_list
- { newfield(link3($1, $2, $3), 0); }
- ;
- subject : SUBJECT ':' things
- { newfield(link3($1, $2, $3), 0); }
- | SUBJECT ':'
- { newfield(link2($1, $2), 0); }
- ;
- received : RECEIVED ':' things
- { newfield(link3($1, $2, $3), 0); received++; }
- | RECEIVED ':'
- { newfield(link2($1, $2), 0); received++; }
- ;
- precedence : PRECEDENCE ':' things
- { newfield(link3($1, $2, $3), 0); }
- | PRECEDENCE ':'
- { newfield(link2($1, $2), 0); }
- ;
- ignored : ignoredhdr ':' things
- { newfield(link3($1, $2, $3), 0); }
- | ignoredhdr ':'
- { newfield(link2($1, $2), 0); }
- ;
- ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
- ;
- optional : fieldwords ':' things
- { /* hack to allow same lex for field names and the rest */
- if(badfieldname($1)){
- freenode($1);
- freenode($2);
- freenode($3);
- return 1;
- }
- newfield(link3($1, $2, $3), 0);
- }
- | fieldwords ':'
- { /* hack to allow same lex for field names and the rest */
- if(badfieldname($1)){
- freenode($1);
- freenode($2);
- return 1;
- }
- newfield(link2($1, $2), 0);
- }
- ;
- address_list : address
- | address_list ',' address
- { $$ = link3($1, $2, $3); }
- ;
- address : mailbox
- | group
- ;
- group : phrase ':' address_list ';'
- { $$ = link2($1, link3($2, $3, $4)); }
- | phrase ':' ';'
- { $$ = link3($1, $2, $3); }
- ;
- mailbox_list : mailbox
- | mailbox_list ',' mailbox
- { $$ = link3($1, $2, $3); }
- ;
- mailbox : route_addr
- | phrase brak_addr
- { $$ = link2($1, $2); }
- | brak_addr
- ;
- brak_addr : '<' route_addr '>'
- { $$ = link3($1, $2, $3); }
- | '<' '>'
- { $$ = nobody($2); freenode($1); }
- ;
- route_addr : route ':' at_addr
- { $$ = address(concat($1, concat($2, $3))); }
- | addr_spec
- ;
- route : '@' domain
- { $$ = concat($1, $2); }
- | route ',' '@' domain
- { $$ = concat($1, concat($2, concat($3, $4))); }
- ;
- addr_spec : local_part
- { $$ = address($1); }
- | at_addr
- ;
- at_addr : local_part '@' domain
- { $$ = address(concat($1, concat($2, $3)));}
- | at_addr '@' domain
- { $$ = address(concat($1, concat($2, $3)));}
- ;
- local_part : word
- ;
- domain : word
- ;
- phrase : word
- | phrase word
- { $$ = link2($1, $2); }
- ;
- things : thing
- | things thing
- { $$ = link2($1, $2); }
- ;
- thing : word | '<' | '>' | '@' | ':' | ';' | ','
- ;
- date_time : things
- ;
- unix_date_time : word word word unix_time word word
- { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
- ;
- unix_time : word
- | unix_time ':' word
- { $$ = link3($1, $2, $3); }
- ;
- word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
- | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
- | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
- | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
- ;
- fieldwords : fieldword
- | WORD
- | fieldwords fieldword
- { $$ = link2($1, $2); }
- | fieldwords word
- { $$ = link2($1, $2); }
- ;
- fieldword : '<' | '>' | '@' | ';' | ','
- ;
- %%
- /*
- * Initialize the parsing. Done once for each header field.
- */
- void
- yyinit(char *p, int len)
- {
- yybuffer = p;
- yylp = p;
- yyend = p + len;
- firstfield = lastfield = 0;
- received = 0;
- }
- /*
- * keywords identifying header fields we care about
- */
- typedef struct Keyword Keyword;
- struct Keyword {
- char *rep;
- int val;
- };
- /* field names that we need to recognize */
- Keyword key[] = {
- { "date", DATE },
- { "resent-date", RESENT_DATE },
- { "return_path", RETURN_PATH },
- { "from", FROM },
- { "sender", SENDER },
- { "reply-to", REPLY_TO },
- { "resent-from", RESENT_FROM },
- { "resent-sender", RESENT_SENDER },
- { "resent-reply-to", RESENT_REPLY_TO },
- { "to", TO },
- { "cc", CC },
- { "bcc", BCC },
- { "resent-to", RESENT_TO },
- { "resent-cc", RESENT_CC },
- { "resent-bcc", RESENT_BCC },
- { "remote", REMOTE },
- { "subject", SUBJECT },
- { "precedence", PRECEDENCE },
- { "mime-version", MIMEVERSION },
- { "content-type", CONTENTTYPE },
- { "message-id", MESSAGEID },
- { "received", RECEIVED },
- { "mailer", MAILER },
- { "who-the-hell-cares", WORD }
- };
- /*
- * Lexical analysis for an rfc822 header field. Continuation lines
- * are handled in yywhite() when skipping over white space.
- *
- */
- yylex(void)
- {
- String *t;
- int quoting;
- int escaping;
- char *start;
- Keyword *kp;
- int c, d;
- /* print("lexing\n"); /**/
- if(yylp >= yyend)
- return 0;
- if(yydone)
- return 0;
- quoting = escaping = 0;
- start = yylp;
- yylval = malloc(sizeof(Node));
- yylval->white = yylval->s = 0;
- yylval->next = 0;
- yylval->addr = 0;
- yylval->start = yylp;
- for(t = 0; yylp < yyend; yylp++){
- c = *yylp & 0xff;
- /* dump nulls, they can't be in header */
- if(c == 0)
- continue;
- if(escaping) {
- escaping = 0;
- } else if(quoting) {
- switch(c){
- case '\\':
- escaping = 1;
- break;
- case '\n':
- d = (*(yylp+1))&0xff;
- if(d != ' ' && d != '\t'){
- quoting = 0;
- yylp--;
- continue;
- }
- break;
- case '"':
- quoting = 0;
- break;
- }
- } else {
- switch(c){
- case '\\':
- escaping = 1;
- break;
- case '(':
- case ' ':
- case '\t':
- case '\r':
- goto out;
- case '\n':
- if(yylp == start){
- yylp++;
- /* print("lex(c %c)\n", c); /**/
- yylval->end = yylp;
- return yylval->c = c;
- }
- goto out;
- case '@':
- case '>':
- case '<':
- case ':':
- case ',':
- case ';':
- if(yylp == start){
- yylp++;
- yylval->white = yywhite();
- /* print("lex(c %c)\n", c); /**/
- yylval->end = yylp;
- return yylval->c = c;
- }
- goto out;
- case '"':
- quoting = 1;
- break;
- default:
- break;
- }
- }
- if(t == 0)
- t = s_new();
- s_putc(t, c);
- }
- out:
- yylval->white = yywhite();
- if(t) {
- s_terminate(t);
- } else /* message begins with white-space! */
- return yylval->c = '\n';
- yylval->s = t;
- for(kp = key; kp->val != WORD; kp++)
- if(cistrcmp(s_to_c(t), kp->rep)==0)
- break;
- /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
- yylval->end = yylp;
- return yylval->c = kp->val;
- }
- void
- yyerror(char *x)
- {
- USED(x);
- /*fprint(2, "parse err: %s\n", x);/**/
- }
- /*
- * parse white space and comments
- */
- String *
- yywhite(void)
- {
- String *w;
- int clevel;
- int c;
- int escaping;
- escaping = clevel = 0;
- for(w = 0; yylp < yyend; yylp++){
- c = *yylp & 0xff;
- /* dump nulls, they can't be in header */
- if(c == 0)
- continue;
- if(escaping){
- escaping = 0;
- } else if(clevel) {
- switch(c){
- case '\n':
- /*
- * look for multiline fields
- */
- if(*(yylp+1)==' ' || *(yylp+1)=='\t')
- break;
- else
- goto out;
- case '\\':
- escaping = 1;
- break;
- case '(':
- clevel++;
- break;
- case ')':
- clevel--;
- break;
- }
- } else {
- switch(c){
- case '\\':
- escaping = 1;
- break;
- case '(':
- clevel++;
- break;
- case ' ':
- case '\t':
- case '\r':
- break;
- case '\n':
- /*
- * look for multiline fields
- */
- if(*(yylp+1)==' ' || *(yylp+1)=='\t')
- break;
- else
- goto out;
- default:
- goto out;
- }
- }
- if(w == 0)
- w = s_new();
- s_putc(w, c);
- }
- out:
- if(w)
- s_terminate(w);
- return w;
- }
- /*
- * link two parsed entries together
- */
- Node*
- link2(Node *p1, Node *p2)
- {
- Node *p;
- for(p = p1; p->next; p = p->next)
- ;
- p->next = p2;
- return p1;
- }
- /*
- * link three parsed entries together
- */
- Node*
- link3(Node *p1, Node *p2, Node *p3)
- {
- Node *p;
- for(p = p2; p->next; p = p->next)
- ;
- p->next = p3;
- for(p = p1; p->next; p = p->next)
- ;
- p->next = p2;
- return p1;
- }
- /*
- * make a:b, move all white space after both
- */
- Node*
- colon(Node *p1, Node *p2)
- {
- if(p1->white){
- if(p2->white)
- s_append(p1->white, s_to_c(p2->white));
- } else {
- p1->white = p2->white;
- p2->white = 0;
- }
- s_append(p1->s, ":");
- if(p2->s)
- s_append(p1->s, s_to_c(p2->s));
- if(p1->end < p2->end)
- p1->end = p2->end;
- freenode(p2);
- return p1;
- }
- /*
- * concatenate two fields, move all white space after both
- */
- Node*
- concat(Node *p1, Node *p2)
- {
- char buf[2];
- if(p1->white){
- if(p2->white)
- s_append(p1->white, s_to_c(p2->white));
- } else {
- p1->white = p2->white;
- p2->white = 0;
- }
- if(p1->s == nil){
- buf[0] = p1->c;
- buf[1] = 0;
- p1->s = s_new();
- s_append(p1->s, buf);
- }
- if(p2->s)
- s_append(p1->s, s_to_c(p2->s));
- else {
- buf[0] = p2->c;
- buf[1] = 0;
- s_append(p1->s, buf);
- }
- if(p1->end < p2->end)
- p1->end = p2->end;
- freenode(p2);
- return p1;
- }
- /*
- * look for disallowed chars in the field name
- */
- int
- badfieldname(Node *p)
- {
- for(; p; p = p->next){
- /* field name can't contain white space */
- if(p->white && p->next)
- return 1;
- }
- return 0;
- }
- /*
- * mark as an address
- */
- Node *
- address(Node *p)
- {
- p->addr = 1;
- return p;
- }
- /*
- * case independent string compare
- */
- int
- cistrcmp(char *s1, char *s2)
- {
- int c1, c2;
- for(; *s1; s1++, s2++){
- c1 = isupper(*s1) ? tolower(*s1) : *s1;
- c2 = isupper(*s2) ? tolower(*s2) : *s2;
- if (c1 != c2)
- return -1;
- }
- return *s2;
- }
- /*
- * free a node
- */
- void
- freenode(Node *p)
- {
- Node *tp;
- while(p){
- tp = p->next;
- if(p->s)
- s_free(p->s);
- if(p->white)
- s_free(p->white);
- free(p);
- p = tp;
- }
- }
- /*
- * an anonymous user
- */
- Node*
- nobody(Node *p)
- {
- if(p->s)
- s_free(p->s);
- p->s = s_copy("pOsTmAsTeR");
- p->addr = 1;
- return p;
- }
- /*
- * add anything that was dropped because of a parse error
- */
- void
- missing(Node *p)
- {
- Node *np;
- char *start, *end;
- Field *f;
- String *s;
- start = yybuffer;
- if(lastfield != nil){
- for(np = lastfield->node; np; np = np->next)
- start = np->end+1;
- }
- end = p->start-1;
- if(end <= start)
- return;
- if(strncmp(start, "From ", 5) == 0)
- return;
- np = malloc(sizeof(Node));
- np->start = start;
- np->end = end;
- np->white = nil;
- s = s_copy("BadHeader: ");
- np->s = s_nappend(s, start, end-start);
- np->next = nil;
- f = malloc(sizeof(Field));
- f->next = 0;
- f->node = np;
- f->source = 0;
- if(firstfield)
- lastfield->next = f;
- else
- firstfield = f;
- lastfield = f;
- }
- /*
- * create a new field
- */
- void
- newfield(Node *p, int source)
- {
- Field *f;
- missing(p);
- f = malloc(sizeof(Field));
- f->next = 0;
- f->node = p;
- f->source = source;
- if(firstfield)
- lastfield->next = f;
- else
- firstfield = f;
- lastfield = f;
- endfield = startfield;
- startfield = yylp;
- }
- /*
- * fee a list of fields
- */
- void
- freefield(Field *f)
- {
- Field *tf;
- while(f){
- tf = f->next;
- freenode(f->node);
- free(f);
- f = tf;
- }
- }
- /*
- * add some white space to a node
- */
- Node*
- whiten(Node *p)
- {
- Node *tp;
- for(tp = p; tp->next; tp = tp->next)
- ;
- if(tp->white == 0)
- tp->white = s_copy(" ");
- return p;
- }
- void
- yycleanup(void)
- {
- Field *f, *fnext;
- Node *np, *next;
- for(f = firstfield; f; f = fnext){
- for(np = f->node; np; np = next){
- if(np->s)
- s_free(np->s);
- if(np->white)
- s_free(np->white);
- next = np->next;
- free(np);
- }
- fnext = f->next;
- free(f);
- }
- firstfield = lastfield = 0;
- }
|