summaryrefslogtreecommitdiff
path: root/rfc822.c
diff options
context:
space:
mode:
Diffstat (limited to 'rfc822.c')
-rw-r--r--rfc822.c193
1 files changed, 193 insertions, 0 deletions
diff --git a/rfc822.c b/rfc822.c
new file mode 100644
index 0000000..5f1cac8
--- /dev/null
+++ b/rfc822.c
@@ -0,0 +1,193 @@
+/**
+ * \file rfc822.c
+ * Code for slicing and dicing RFC822 mail headers.
+ *
+ * How to parse RFC822 headers in C. This is not a fully conformant
+ * implementation of RFC822 or RFC2822, but it has been in production use in a
+ * widely-deployed MTA (fetcmail) since 1996 without complaints. Really
+ * perverse combinations of quoting and commenting could break it.
+ *
+ * \author Eric S. Raymond <esr@thyrsus.com>, 1997. This source code example
+ * is part of fetchmail and the Unix Cookbook, and are released under the MIT
+ * license.
+ */
+
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+
+#ifndef TRUE
+#define TRUE 1
+#define FALSE 0
+#endif
+
+#define HEADER_END(p) ((p)[0] == '\n' && ((p)[1] != ' ' && (p)[1] != '\t'))
+
+
+/**
+ * Parse addresses in succession out of a specified RFC822 header.
+ *
+ * \param hdr header to be parsed, NUL to continue previous \p hdr.
+ */
+char *next_address(const char *hdr)
+{
+ static unsigned char address[BUFSIZ];
+ static int tp;
+ static const unsigned char *hp;
+ static int state, oldstate;
+ int parendepth = 0;
+
+#define START_HDR 0 /* before header colon */
+#define SKIP_JUNK 1 /* skip whitespace, \n, and junk */
+#define BARE_ADDRESS 2 /* collecting address without delimiters */
+#define INSIDE_DQUOTE 3 /* inside double quotes */
+#define INSIDE_PARENS 4 /* inside parentheses */
+#define INSIDE_BRACKETS 5 /* inside bracketed address */
+#define ENDIT_ALL 6 /* after last address */
+
+#define NEXTTP() ((tp < sizeof(address)-1) ? tp++ : tp)
+
+ if (hdr)
+ {
+ hp = hdr;
+ state = START_HDR;
+ tp = 0;
+ }
+
+ for (; *hp; hp++)
+ {
+
+ if (state == ENDIT_ALL) /* after last address */
+ return(NULL);
+ else if (HEADER_END(hp))
+ {
+ state = ENDIT_ALL;
+ if (tp)
+ {
+ while (isspace(address[--tp]))
+ continue;
+ address[++tp] = '\0';
+ tp = 0;
+ return (address);
+ }
+ return((unsigned char *)NULL);
+ }
+ else if (*hp == '\\') /* handle RFC822 escaping */
+ {
+ if (state != INSIDE_PARENS)
+ {
+ address[NEXTTP()] = *hp++; /* take the escape */
+ address[NEXTTP()] = *hp; /* take following unsigned char */
+ }
+ }
+ else switch (state)
+ {
+ case START_HDR: /* before header colon */
+ if (*hp == ':')
+ state = SKIP_JUNK;
+ break;
+
+ case SKIP_JUNK: /* looking for address start */
+ if (*hp == '"') /* quoted string */
+ {
+ oldstate = SKIP_JUNK;
+ state = INSIDE_DQUOTE;
+ address[NEXTTP()] = *hp;
+ }
+ else if (*hp == '(') /* address comment -- ignore */
+ {
+ parendepth = 1;
+ oldstate = SKIP_JUNK;
+ state = INSIDE_PARENS;
+ }
+ else if (*hp == '<') /* begin <address> */
+ {
+ state = INSIDE_BRACKETS;
+ tp = 0;
+ }
+ else if (*hp != ',' && !isspace(*hp))
+ {
+ --hp;
+ state = BARE_ADDRESS;
+ }
+ break;
+
+ case BARE_ADDRESS: /* collecting address without delimiters */
+ if (*hp == ',') /* end of address */
+ {
+ if (tp)
+ {
+ address[NEXTTP()] = '\0';
+ state = SKIP_JUNK;
+ tp = 0;
+ return(address);
+ }
+ }
+ else if (*hp == '(') /* beginning of comment */
+ {
+ parendepth = 1;
+ oldstate = BARE_ADDRESS;
+ state = INSIDE_PARENS;
+ }
+ else if (*hp == '<') /* beginning of real address */
+ {
+ state = INSIDE_BRACKETS;
+ tp = 0;
+ }
+ else if (*hp == '"') /* quoted word, copy verbatim */
+ {
+ oldstate = state;
+ state = INSIDE_DQUOTE;
+ address[NEXTTP()] = *hp;
+ }
+ else if (!isspace(*hp)) /* just take it, ignoring whitespace */
+ address[NEXTTP()] = *hp;
+ break;
+
+ case INSIDE_DQUOTE: /* we're in a quoted string, copy verbatim */
+ if (*hp != '"')
+ address[NEXTTP()] = *hp;
+ else
+ {
+ address[NEXTTP()] = *hp;
+ state = oldstate;
+ }
+ break;
+
+ case INSIDE_PARENS: /* we're in a parenthesized comment, ignore */
+ if (*hp == '(')
+ ++parendepth;
+ else if (*hp == ')')
+ --parendepth;
+ if (parendepth == 0)
+ state = oldstate;
+ break;
+
+ case INSIDE_BRACKETS: /* possible <>-enclosed address */
+ if (*hp == '>') /* end of address */
+ {
+ address[NEXTTP()] = '\0';
+ state = SKIP_JUNK;
+ ++hp;
+ tp = 0;
+ return(address);
+ }
+ else if (*hp == '<') /* nested <> */
+ tp = 0;
+ else if (*hp == '"') /* quoted address */
+ {
+ address[NEXTTP()] = *hp;
+ oldstate = INSIDE_BRACKETS;
+ state = INSIDE_DQUOTE;
+ }
+ else /* just copy address */
+ address[NEXTTP()] = *hp;
+ break;
+ }
+ }
+
+ return(NULL);
+}