rfc822.y 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. %{
  2. #include "common.h"
  3. #include "smtp.h"
  4. #include <ctype.h>
  5. char *yylp; /* next character to be lex'd */
  6. int yydone; /* tell yylex to give up */
  7. char *yybuffer; /* first parsed character */
  8. char *yyend; /* end of buffer to be parsed */
  9. Node *root;
  10. Field *firstfield;
  11. Field *lastfield;
  12. Node *usender;
  13. Node *usys;
  14. Node *udate;
  15. char *startfield, *endfield;
  16. int originator;
  17. int destination;
  18. int date;
  19. int received;
  20. int messageid;
  21. %}
  22. %term WORD
  23. %term DATE
  24. %term RESENT_DATE
  25. %term RETURN_PATH
  26. %term FROM
  27. %term SENDER
  28. %term REPLY_TO
  29. %term RESENT_FROM
  30. %term RESENT_SENDER
  31. %term RESENT_REPLY_TO
  32. %term SUBJECT
  33. %term TO
  34. %term CC
  35. %term BCC
  36. %term RESENT_TO
  37. %term RESENT_CC
  38. %term RESENT_BCC
  39. %term REMOTE
  40. %term PRECEDENCE
  41. %term MIMEVERSION
  42. %term CONTENTTYPE
  43. %term MESSAGEID
  44. %term RECEIVED
  45. %term MAILER
  46. %term BADTOKEN
  47. %start msg
  48. %%
  49. msg : fields
  50. | unixfrom '\n' fields
  51. ;
  52. fields : '\n'
  53. { yydone = 1; }
  54. | field '\n'
  55. | field '\n' fields
  56. ;
  57. field : dates
  58. { date = 1; }
  59. | originator
  60. { originator = 1; }
  61. | destination
  62. { destination = 1; }
  63. | subject
  64. | optional
  65. | ignored
  66. | received
  67. | precedence
  68. | error '\n' field
  69. ;
  70. unixfrom : FROM route_addr unix_date_time REMOTE FROM word
  71. { freenode($1); freenode($4); freenode($5);
  72. usender = $2; udate = $3; usys = $6;
  73. }
  74. ;
  75. originator : REPLY_TO ':' address_list
  76. { newfield(link3($1, $2, $3), 1); }
  77. | RETURN_PATH ':' route_addr
  78. { newfield(link3($1, $2, $3), 1); }
  79. | FROM ':' mailbox_list
  80. { newfield(link3($1, $2, $3), 1); }
  81. | SENDER ':' mailbox
  82. { newfield(link3($1, $2, $3), 1); }
  83. | RESENT_REPLY_TO ':' address_list
  84. { newfield(link3($1, $2, $3), 1); }
  85. | RESENT_SENDER ':' mailbox
  86. { newfield(link3($1, $2, $3), 1); }
  87. | RESENT_FROM ':' mailbox
  88. { newfield(link3($1, $2, $3), 1); }
  89. ;
  90. dates : DATE ':' date_time
  91. { newfield(link3($1, $2, $3), 0); }
  92. | RESENT_DATE ':' date_time
  93. { newfield(link3($1, $2, $3), 0); }
  94. ;
  95. destination : TO ':'
  96. { newfield(link2($1, $2), 0); }
  97. | TO ':' address_list
  98. { newfield(link3($1, $2, $3), 0); }
  99. | RESENT_TO ':'
  100. { newfield(link2($1, $2), 0); }
  101. | RESENT_TO ':' address_list
  102. { newfield(link3($1, $2, $3), 0); }
  103. | CC ':'
  104. { newfield(link2($1, $2), 0); }
  105. | CC ':' address_list
  106. { newfield(link3($1, $2, $3), 0); }
  107. | RESENT_CC ':'
  108. { newfield(link2($1, $2), 0); }
  109. | RESENT_CC ':' address_list
  110. { newfield(link3($1, $2, $3), 0); }
  111. | BCC ':'
  112. { newfield(link2($1, $2), 0); }
  113. | BCC ':' address_list
  114. { newfield(link3($1, $2, $3), 0); }
  115. | RESENT_BCC ':'
  116. { newfield(link2($1, $2), 0); }
  117. | RESENT_BCC ':' address_list
  118. { newfield(link3($1, $2, $3), 0); }
  119. ;
  120. subject : SUBJECT ':' things
  121. { newfield(link3($1, $2, $3), 0); }
  122. | SUBJECT ':'
  123. { newfield(link2($1, $2), 0); }
  124. ;
  125. received : RECEIVED ':' things
  126. { newfield(link3($1, $2, $3), 0); received++; }
  127. | RECEIVED ':'
  128. { newfield(link2($1, $2), 0); received++; }
  129. ;
  130. precedence : PRECEDENCE ':' things
  131. { newfield(link3($1, $2, $3), 0); }
  132. | PRECEDENCE ':'
  133. { newfield(link2($1, $2), 0); }
  134. ;
  135. ignored : ignoredhdr ':' things
  136. { newfield(link3($1, $2, $3), 0); }
  137. | ignoredhdr ':'
  138. { newfield(link2($1, $2), 0); }
  139. ;
  140. ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
  141. ;
  142. optional : fieldwords ':' things
  143. { /* hack to allow same lex for field names and the rest */
  144. if(badfieldname($1)){
  145. freenode($1);
  146. freenode($2);
  147. freenode($3);
  148. return 1;
  149. }
  150. newfield(link3($1, $2, $3), 0);
  151. }
  152. | fieldwords ':'
  153. { /* hack to allow same lex for field names and the rest */
  154. if(badfieldname($1)){
  155. freenode($1);
  156. freenode($2);
  157. return 1;
  158. }
  159. newfield(link2($1, $2), 0);
  160. }
  161. ;
  162. address_list : address
  163. | address_list ',' address
  164. { $$ = link3($1, $2, $3); }
  165. ;
  166. address : mailbox
  167. | group
  168. ;
  169. group : phrase ':' address_list ';'
  170. { $$ = link2($1, link3($2, $3, $4)); }
  171. | phrase ':' ';'
  172. { $$ = link3($1, $2, $3); }
  173. ;
  174. mailbox_list : mailbox
  175. | mailbox_list ',' mailbox
  176. { $$ = link3($1, $2, $3); }
  177. ;
  178. mailbox : route_addr
  179. | phrase brak_addr
  180. { $$ = link2($1, $2); }
  181. | brak_addr
  182. ;
  183. brak_addr : '<' route_addr '>'
  184. { $$ = link3($1, $2, $3); }
  185. | '<' '>'
  186. { $$ = nobody($2); freenode($1); }
  187. ;
  188. route_addr : route ':' at_addr
  189. { $$ = address(concat($1, concat($2, $3))); }
  190. | addr_spec
  191. ;
  192. route : '@' domain
  193. { $$ = concat($1, $2); }
  194. | route ',' '@' domain
  195. { $$ = concat($1, concat($2, concat($3, $4))); }
  196. ;
  197. addr_spec : local_part
  198. { $$ = address($1); }
  199. | at_addr
  200. ;
  201. at_addr : local_part '@' domain
  202. { $$ = address(concat($1, concat($2, $3)));}
  203. | at_addr '@' domain
  204. { $$ = address(concat($1, concat($2, $3)));}
  205. ;
  206. local_part : word
  207. ;
  208. domain : word
  209. ;
  210. phrase : word
  211. | phrase word
  212. { $$ = link2($1, $2); }
  213. ;
  214. things : thing
  215. | things thing
  216. { $$ = link2($1, $2); }
  217. ;
  218. thing : word | '<' | '>' | '@' | ':' | ';' | ','
  219. ;
  220. date_time : things
  221. ;
  222. unix_date_time : word word word unix_time word word
  223. { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
  224. ;
  225. unix_time : word
  226. | unix_time ':' word
  227. { $$ = link3($1, $2, $3); }
  228. ;
  229. word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
  230. | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
  231. | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
  232. | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
  233. ;
  234. fieldwords : fieldword
  235. | WORD
  236. | fieldwords fieldword
  237. { $$ = link2($1, $2); }
  238. | fieldwords word
  239. { $$ = link2($1, $2); }
  240. ;
  241. fieldword : '<' | '>' | '@' | ';' | ','
  242. ;
  243. %%
  244. /*
  245. * Initialize the parsing. Done once for each header field.
  246. */
  247. void
  248. yyinit(char *p, int len)
  249. {
  250. yybuffer = p;
  251. yylp = p;
  252. yyend = p + len;
  253. firstfield = lastfield = 0;
  254. received = 0;
  255. }
  256. /*
  257. * keywords identifying header fields we care about
  258. */
  259. typedef struct Keyword Keyword;
  260. struct Keyword {
  261. char *rep;
  262. int val;
  263. };
  264. /* field names that we need to recognize */
  265. Keyword key[] = {
  266. { "date", DATE },
  267. { "resent-date", RESENT_DATE },
  268. { "return_path", RETURN_PATH },
  269. { "from", FROM },
  270. { "sender", SENDER },
  271. { "reply-to", REPLY_TO },
  272. { "resent-from", RESENT_FROM },
  273. { "resent-sender", RESENT_SENDER },
  274. { "resent-reply-to", RESENT_REPLY_TO },
  275. { "to", TO },
  276. { "cc", CC },
  277. { "bcc", BCC },
  278. { "resent-to", RESENT_TO },
  279. { "resent-cc", RESENT_CC },
  280. { "resent-bcc", RESENT_BCC },
  281. { "remote", REMOTE },
  282. { "subject", SUBJECT },
  283. { "precedence", PRECEDENCE },
  284. { "mime-version", MIMEVERSION },
  285. { "content-type", CONTENTTYPE },
  286. { "message-id", MESSAGEID },
  287. { "received", RECEIVED },
  288. { "mailer", MAILER },
  289. { "who-the-hell-cares", WORD }
  290. };
  291. /*
  292. * Lexical analysis for an rfc822 header field. Continuation lines
  293. * are handled in yywhite() when skipping over white space.
  294. *
  295. */
  296. yylex(void)
  297. {
  298. String *t;
  299. int quoting;
  300. int escaping;
  301. char *start;
  302. Keyword *kp;
  303. int c, d;
  304. /* print("lexing\n"); /**/
  305. if(yylp >= yyend)
  306. return 0;
  307. if(yydone)
  308. return 0;
  309. quoting = escaping = 0;
  310. start = yylp;
  311. yylval = malloc(sizeof(Node));
  312. yylval->white = yylval->s = 0;
  313. yylval->next = 0;
  314. yylval->addr = 0;
  315. yylval->start = yylp;
  316. for(t = 0; yylp < yyend; yylp++){
  317. c = *yylp & 0xff;
  318. /* dump nulls, they can't be in header */
  319. if(c == 0)
  320. continue;
  321. if(escaping) {
  322. escaping = 0;
  323. } else if(quoting) {
  324. switch(c){
  325. case '\\':
  326. escaping = 1;
  327. break;
  328. case '\n':
  329. d = (*(yylp+1))&0xff;
  330. if(d != ' ' && d != '\t'){
  331. quoting = 0;
  332. yylp--;
  333. continue;
  334. }
  335. break;
  336. case '"':
  337. quoting = 0;
  338. break;
  339. }
  340. } else {
  341. switch(c){
  342. case '\\':
  343. escaping = 1;
  344. break;
  345. case '(':
  346. case ' ':
  347. case '\t':
  348. case '\r':
  349. goto out;
  350. case '\n':
  351. if(yylp == start){
  352. yylp++;
  353. /* print("lex(c %c)\n", c); /**/
  354. yylval->end = yylp;
  355. return yylval->c = c;
  356. }
  357. goto out;
  358. case '@':
  359. case '>':
  360. case '<':
  361. case ':':
  362. case ',':
  363. case ';':
  364. if(yylp == start){
  365. yylp++;
  366. yylval->white = yywhite();
  367. /* print("lex(c %c)\n", c); /**/
  368. yylval->end = yylp;
  369. return yylval->c = c;
  370. }
  371. goto out;
  372. case '"':
  373. quoting = 1;
  374. break;
  375. default:
  376. break;
  377. }
  378. }
  379. if(t == 0)
  380. t = s_new();
  381. s_putc(t, c);
  382. }
  383. out:
  384. yylval->white = yywhite();
  385. if(t) {
  386. s_terminate(t);
  387. } else /* message begins with white-space! */
  388. return yylval->c = '\n';
  389. yylval->s = t;
  390. for(kp = key; kp->val != WORD; kp++)
  391. if(cistrcmp(s_to_c(t), kp->rep)==0)
  392. break;
  393. /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
  394. yylval->end = yylp;
  395. return yylval->c = kp->val;
  396. }
  397. void
  398. yyerror(char *x)
  399. {
  400. USED(x);
  401. /*fprint(2, "parse err: %s\n", x);/**/
  402. }
  403. /*
  404. * parse white space and comments
  405. */
  406. String *
  407. yywhite(void)
  408. {
  409. String *w;
  410. int clevel;
  411. int c;
  412. int escaping;
  413. escaping = clevel = 0;
  414. for(w = 0; yylp < yyend; yylp++){
  415. c = *yylp & 0xff;
  416. /* dump nulls, they can't be in header */
  417. if(c == 0)
  418. continue;
  419. if(escaping){
  420. escaping = 0;
  421. } else if(clevel) {
  422. switch(c){
  423. case '\n':
  424. /*
  425. * look for multiline fields
  426. */
  427. if(*(yylp+1)==' ' || *(yylp+1)=='\t')
  428. break;
  429. else
  430. goto out;
  431. case '\\':
  432. escaping = 1;
  433. break;
  434. case '(':
  435. clevel++;
  436. break;
  437. case ')':
  438. clevel--;
  439. break;
  440. }
  441. } else {
  442. switch(c){
  443. case '\\':
  444. escaping = 1;
  445. break;
  446. case '(':
  447. clevel++;
  448. break;
  449. case ' ':
  450. case '\t':
  451. case '\r':
  452. break;
  453. case '\n':
  454. /*
  455. * look for multiline fields
  456. */
  457. if(*(yylp+1)==' ' || *(yylp+1)=='\t')
  458. break;
  459. else
  460. goto out;
  461. default:
  462. goto out;
  463. }
  464. }
  465. if(w == 0)
  466. w = s_new();
  467. s_putc(w, c);
  468. }
  469. out:
  470. if(w)
  471. s_terminate(w);
  472. return w;
  473. }
  474. /*
  475. * link two parsed entries together
  476. */
  477. Node*
  478. link2(Node *p1, Node *p2)
  479. {
  480. Node *p;
  481. for(p = p1; p->next; p = p->next)
  482. ;
  483. p->next = p2;
  484. return p1;
  485. }
  486. /*
  487. * link three parsed entries together
  488. */
  489. Node*
  490. link3(Node *p1, Node *p2, Node *p3)
  491. {
  492. Node *p;
  493. for(p = p2; p->next; p = p->next)
  494. ;
  495. p->next = p3;
  496. for(p = p1; p->next; p = p->next)
  497. ;
  498. p->next = p2;
  499. return p1;
  500. }
  501. /*
  502. * make a:b, move all white space after both
  503. */
  504. Node*
  505. colon(Node *p1, Node *p2)
  506. {
  507. if(p1->white){
  508. if(p2->white)
  509. s_append(p1->white, s_to_c(p2->white));
  510. } else {
  511. p1->white = p2->white;
  512. p2->white = 0;
  513. }
  514. s_append(p1->s, ":");
  515. if(p2->s)
  516. s_append(p1->s, s_to_c(p2->s));
  517. if(p1->end < p2->end)
  518. p1->end = p2->end;
  519. freenode(p2);
  520. return p1;
  521. }
  522. /*
  523. * concatenate two fields, move all white space after both
  524. */
  525. Node*
  526. concat(Node *p1, Node *p2)
  527. {
  528. char buf[2];
  529. if(p1->white){
  530. if(p2->white)
  531. s_append(p1->white, s_to_c(p2->white));
  532. } else {
  533. p1->white = p2->white;
  534. p2->white = 0;
  535. }
  536. if(p1->s == nil){
  537. buf[0] = p1->c;
  538. buf[1] = 0;
  539. p1->s = s_new();
  540. s_append(p1->s, buf);
  541. }
  542. if(p2->s)
  543. s_append(p1->s, s_to_c(p2->s));
  544. else {
  545. buf[0] = p2->c;
  546. buf[1] = 0;
  547. s_append(p1->s, buf);
  548. }
  549. if(p1->end < p2->end)
  550. p1->end = p2->end;
  551. freenode(p2);
  552. return p1;
  553. }
  554. /*
  555. * look for disallowed chars in the field name
  556. */
  557. int
  558. badfieldname(Node *p)
  559. {
  560. for(; p; p = p->next){
  561. /* field name can't contain white space */
  562. if(p->white && p->next)
  563. return 1;
  564. }
  565. return 0;
  566. }
  567. /*
  568. * mark as an address
  569. */
  570. Node *
  571. address(Node *p)
  572. {
  573. p->addr = 1;
  574. return p;
  575. }
  576. /*
  577. * case independent string compare
  578. */
  579. int
  580. cistrcmp(char *s1, char *s2)
  581. {
  582. int c1, c2;
  583. for(; *s1; s1++, s2++){
  584. c1 = isupper(*s1) ? tolower(*s1) : *s1;
  585. c2 = isupper(*s2) ? tolower(*s2) : *s2;
  586. if (c1 != c2)
  587. return -1;
  588. }
  589. return *s2;
  590. }
  591. /*
  592. * free a node
  593. */
  594. void
  595. freenode(Node *p)
  596. {
  597. Node *tp;
  598. while(p){
  599. tp = p->next;
  600. if(p->s)
  601. s_free(p->s);
  602. if(p->white)
  603. s_free(p->white);
  604. free(p);
  605. p = tp;
  606. }
  607. }
  608. /*
  609. * an anonymous user
  610. */
  611. Node*
  612. nobody(Node *p)
  613. {
  614. if(p->s)
  615. s_free(p->s);
  616. p->s = s_copy("pOsTmAsTeR");
  617. p->addr = 1;
  618. return p;
  619. }
  620. /*
  621. * add anything that was dropped because of a parse error
  622. */
  623. void
  624. missing(Node *p)
  625. {
  626. Node *np;
  627. char *start, *end;
  628. Field *f;
  629. String *s;
  630. start = yybuffer;
  631. if(lastfield != nil){
  632. for(np = lastfield->node; np; np = np->next)
  633. start = np->end+1;
  634. }
  635. end = p->start-1;
  636. if(end <= start)
  637. return;
  638. if(strncmp(start, "From ", 5) == 0)
  639. return;
  640. np = malloc(sizeof(Node));
  641. np->start = start;
  642. np->end = end;
  643. np->white = nil;
  644. s = s_copy("BadHeader: ");
  645. np->s = s_nappend(s, start, end-start);
  646. np->next = nil;
  647. f = malloc(sizeof(Field));
  648. f->next = 0;
  649. f->node = np;
  650. f->source = 0;
  651. if(firstfield)
  652. lastfield->next = f;
  653. else
  654. firstfield = f;
  655. lastfield = f;
  656. }
  657. /*
  658. * create a new field
  659. */
  660. void
  661. newfield(Node *p, int source)
  662. {
  663. Field *f;
  664. missing(p);
  665. f = malloc(sizeof(Field));
  666. f->next = 0;
  667. f->node = p;
  668. f->source = source;
  669. if(firstfield)
  670. lastfield->next = f;
  671. else
  672. firstfield = f;
  673. lastfield = f;
  674. endfield = startfield;
  675. startfield = yylp;
  676. }
  677. /*
  678. * fee a list of fields
  679. */
  680. void
  681. freefield(Field *f)
  682. {
  683. Field *tf;
  684. while(f){
  685. tf = f->next;
  686. freenode(f->node);
  687. free(f);
  688. f = tf;
  689. }
  690. }
  691. /*
  692. * add some white space to a node
  693. */
  694. Node*
  695. whiten(Node *p)
  696. {
  697. Node *tp;
  698. for(tp = p; tp->next; tp = tp->next)
  699. ;
  700. if(tp->white == 0)
  701. tp->white = s_copy(" ");
  702. return p;
  703. }
  704. void
  705. yycleanup(void)
  706. {
  707. Field *f, *fnext;
  708. Node *np, *next;
  709. for(f = firstfield; f; f = fnext){
  710. for(np = f->node; np; np = next){
  711. if(np->s)
  712. s_free(np->s);
  713. if(np->white)
  714. s_free(np->white);
  715. next = np->next;
  716. free(np);
  717. }
  718. fnext = f->next;
  719. free(f);
  720. }
  721. firstfield = lastfield = 0;
  722. }