From 14c9366aac697e87499c5748b36fa7bf5e6cd320 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 22 Aug 2008 09:17:14 -0400 Subject: include external libs in a subdir to make install easier darcs-hash:20080822131714-84dde-6978424ded2ed1041a65142a25560654ac717fcd.gz --- extlib/Mail/RFC822.php | 940 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 940 insertions(+) create mode 100644 extlib/Mail/RFC822.php (limited to 'extlib/Mail/RFC822.php') diff --git a/extlib/Mail/RFC822.php b/extlib/Mail/RFC822.php new file mode 100644 index 000000000..8714df2e2 --- /dev/null +++ b/extlib/Mail/RFC822.php @@ -0,0 +1,940 @@ + | +// | Chuck Hagenbuch | +// +-----------------------------------------------------------------------+ + +/** + * RFC 822 Email address list validation Utility + * + * What is it? + * + * This class will take an address string, and parse it into it's consituent + * parts, be that either addresses, groups, or combinations. Nested groups + * are not supported. The structure it returns is pretty straight forward, + * and is similar to that provided by the imap_rfc822_parse_adrlist(). Use + * print_r() to view the structure. + * + * How do I use it? + * + * $address_string = 'My Group: "Richard" (A comment), ted@example.com (Ted Bloggs), Barney;'; + * $structure = Mail_RFC822::parseAddressList($address_string, 'example.com', true) + * print_r($structure); + * + * @author Richard Heyes + * @author Chuck Hagenbuch + * @version $Revision: 1.24 $ + * @license BSD + * @package Mail + */ +class Mail_RFC822 { + + /** + * The address being parsed by the RFC822 object. + * @var string $address + */ + var $address = ''; + + /** + * The default domain to use for unqualified addresses. + * @var string $default_domain + */ + var $default_domain = 'localhost'; + + /** + * Should we return a nested array showing groups, or flatten everything? + * @var boolean $nestGroups + */ + var $nestGroups = true; + + /** + * Whether or not to validate atoms for non-ascii characters. + * @var boolean $validate + */ + var $validate = true; + + /** + * The array of raw addresses built up as we parse. + * @var array $addresses + */ + var $addresses = array(); + + /** + * The final array of parsed address information that we build up. + * @var array $structure + */ + var $structure = array(); + + /** + * The current error message, if any. + * @var string $error + */ + var $error = null; + + /** + * An internal counter/pointer. + * @var integer $index + */ + var $index = null; + + /** + * The number of groups that have been found in the address list. + * @var integer $num_groups + * @access public + */ + var $num_groups = 0; + + /** + * A variable so that we can tell whether or not we're inside a + * Mail_RFC822 object. + * @var boolean $mailRFC822 + */ + var $mailRFC822 = true; + + /** + * A limit after which processing stops + * @var int $limit + */ + var $limit = null; + + /** + * Sets up the object. The address must either be set here or when + * calling parseAddressList(). One or the other. + * + * @access public + * @param string $address The address(es) to validate. + * @param string $default_domain Default domain/host etc. If not supplied, will be set to localhost. + * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing. + * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance. + * + * @return object Mail_RFC822 A new Mail_RFC822 object. + */ + function Mail_RFC822($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null) + { + if (isset($address)) $this->address = $address; + if (isset($default_domain)) $this->default_domain = $default_domain; + if (isset($nest_groups)) $this->nestGroups = $nest_groups; + if (isset($validate)) $this->validate = $validate; + if (isset($limit)) $this->limit = $limit; + } + + /** + * Starts the whole process. The address must either be set here + * or when creating the object. One or the other. + * + * @access public + * @param string $address The address(es) to validate. + * @param string $default_domain Default domain/host etc. + * @param boolean $nest_groups Whether to return the structure with groups nested for easier viewing. + * @param boolean $validate Whether to validate atoms. Turn this off if you need to run addresses through before encoding the personal names, for instance. + * + * @return array A structured array of addresses. + */ + function parseAddressList($address = null, $default_domain = null, $nest_groups = null, $validate = null, $limit = null) + { + if (!isset($this) || !isset($this->mailRFC822)) { + $obj = new Mail_RFC822($address, $default_domain, $nest_groups, $validate, $limit); + return $obj->parseAddressList(); + } + + if (isset($address)) $this->address = $address; + if (isset($default_domain)) $this->default_domain = $default_domain; + if (isset($nest_groups)) $this->nestGroups = $nest_groups; + if (isset($validate)) $this->validate = $validate; + if (isset($limit)) $this->limit = $limit; + + $this->structure = array(); + $this->addresses = array(); + $this->error = null; + $this->index = null; + + // Unfold any long lines in $this->address. + $this->address = preg_replace('/\r?\n/', "\r\n", $this->address); + $this->address = preg_replace('/\r\n(\t| )+/', ' ', $this->address); + + while ($this->address = $this->_splitAddresses($this->address)); + + if ($this->address === false || isset($this->error)) { + require_once 'PEAR.php'; + return PEAR::raiseError($this->error); + } + + // Validate each address individually. If we encounter an invalid + // address, stop iterating and return an error immediately. + foreach ($this->addresses as $address) { + $valid = $this->_validateAddress($address); + + if ($valid === false || isset($this->error)) { + require_once 'PEAR.php'; + return PEAR::raiseError($this->error); + } + + if (!$this->nestGroups) { + $this->structure = array_merge($this->structure, $valid); + } else { + $this->structure[] = $valid; + } + } + + return $this->structure; + } + + /** + * Splits an address into separate addresses. + * + * @access private + * @param string $address The addresses to split. + * @return boolean Success or failure. + */ + function _splitAddresses($address) + { + if (!empty($this->limit) && count($this->addresses) == $this->limit) { + return ''; + } + + if ($this->_isGroup($address) && !isset($this->error)) { + $split_char = ';'; + $is_group = true; + } elseif (!isset($this->error)) { + $split_char = ','; + $is_group = false; + } elseif (isset($this->error)) { + return false; + } + + // Split the string based on the above ten or so lines. + $parts = explode($split_char, $address); + $string = $this->_splitCheck($parts, $split_char); + + // If a group... + if ($is_group) { + // If $string does not contain a colon outside of + // brackets/quotes etc then something's fubar. + + // First check there's a colon at all: + if (strpos($string, ':') === false) { + $this->error = 'Invalid address: ' . $string; + return false; + } + + // Now check it's outside of brackets/quotes: + if (!$this->_splitCheck(explode(':', $string), ':')) { + return false; + } + + // We must have a group at this point, so increase the counter: + $this->num_groups++; + } + + // $string now contains the first full address/group. + // Add to the addresses array. + $this->addresses[] = array( + 'address' => trim($string), + 'group' => $is_group + ); + + // Remove the now stored address from the initial line, the +1 + // is to account for the explode character. + $address = trim(substr($address, strlen($string) + 1)); + + // If the next char is a comma and this was a group, then + // there are more addresses, otherwise, if there are any more + // chars, then there is another address. + if ($is_group && substr($address, 0, 1) == ','){ + $address = trim(substr($address, 1)); + return $address; + + } elseif (strlen($address) > 0) { + return $address; + + } else { + return ''; + } + + // If you got here then something's off + return false; + } + + /** + * Checks for a group at the start of the string. + * + * @access private + * @param string $address The address to check. + * @return boolean Whether or not there is a group at the start of the string. + */ + function _isGroup($address) + { + // First comma not in quotes, angles or escaped: + $parts = explode(',', $address); + $string = $this->_splitCheck($parts, ','); + + // Now we have the first address, we can reliably check for a + // group by searching for a colon that's not escaped or in + // quotes or angle brackets. + if (count($parts = explode(':', $string)) > 1) { + $string2 = $this->_splitCheck($parts, ':'); + return ($string2 !== $string); + } else { + return false; + } + } + + /** + * A common function that will check an exploded string. + * + * @access private + * @param array $parts The exloded string. + * @param string $char The char that was exploded on. + * @return mixed False if the string contains unclosed quotes/brackets, or the string on success. + */ + function _splitCheck($parts, $char) + { + $string = $parts[0]; + + for ($i = 0; $i < count($parts); $i++) { + if ($this->_hasUnclosedQuotes($string) + || $this->_hasUnclosedBrackets($string, '<>') + || $this->_hasUnclosedBrackets($string, '[]') + || $this->_hasUnclosedBrackets($string, '()') + || substr($string, -1) == '\\') { + if (isset($parts[$i + 1])) { + $string = $string . $char . $parts[$i + 1]; + } else { + $this->error = 'Invalid address spec. Unclosed bracket or quotes'; + return false; + } + } else { + $this->index = $i; + break; + } + } + + return $string; + } + + /** + * Checks if a string has unclosed quotes or not. + * + * @access private + * @param string $string The string to check. + * @return boolean True if there are unclosed quotes inside the string, + * false otherwise. + */ + function _hasUnclosedQuotes($string) + { + $string = trim($string); + $iMax = strlen($string); + $in_quote = false; + $i = $slashes = 0; + + for (; $i < $iMax; ++$i) { + switch ($string[$i]) { + case '\\': + ++$slashes; + break; + + case '"': + if ($slashes % 2 == 0) { + $in_quote = !$in_quote; + } + // Fall through to default action below. + + default: + $slashes = 0; + break; + } + } + + return $in_quote; + } + + /** + * Checks if a string has an unclosed brackets or not. IMPORTANT: + * This function handles both angle brackets and square brackets; + * + * @access private + * @param string $string The string to check. + * @param string $chars The characters to check for. + * @return boolean True if there are unclosed brackets inside the string, false otherwise. + */ + function _hasUnclosedBrackets($string, $chars) + { + $num_angle_start = substr_count($string, $chars[0]); + $num_angle_end = substr_count($string, $chars[1]); + + $this->_hasUnclosedBracketsSub($string, $num_angle_start, $chars[0]); + $this->_hasUnclosedBracketsSub($string, $num_angle_end, $chars[1]); + + if ($num_angle_start < $num_angle_end) { + $this->error = 'Invalid address spec. Unmatched quote or bracket (' . $chars . ')'; + return false; + } else { + return ($num_angle_start > $num_angle_end); + } + } + + /** + * Sub function that is used only by hasUnclosedBrackets(). + * + * @access private + * @param string $string The string to check. + * @param integer &$num The number of occurences. + * @param string $char The character to count. + * @return integer The number of occurences of $char in $string, adjusted for backslashes. + */ + function _hasUnclosedBracketsSub($string, &$num, $char) + { + $parts = explode($char, $string); + for ($i = 0; $i < count($parts); $i++){ + if (substr($parts[$i], -1) == '\\' || $this->_hasUnclosedQuotes($parts[$i])) + $num--; + if (isset($parts[$i + 1])) + $parts[$i + 1] = $parts[$i] . $char . $parts[$i + 1]; + } + + return $num; + } + + /** + * Function to begin checking the address. + * + * @access private + * @param string $address The address to validate. + * @return mixed False on failure, or a structured array of address information on success. + */ + function _validateAddress($address) + { + $is_group = false; + $addresses = array(); + + if ($address['group']) { + $is_group = true; + + // Get the group part of the name + $parts = explode(':', $address['address']); + $groupname = $this->_splitCheck($parts, ':'); + $structure = array(); + + // And validate the group part of the name. + if (!$this->_validatePhrase($groupname)){ + $this->error = 'Group name did not validate.'; + return false; + } else { + // Don't include groups if we are not nesting + // them. This avoids returning invalid addresses. + if ($this->nestGroups) { + $structure = new stdClass; + $structure->groupname = $groupname; + } + } + + $address['address'] = ltrim(substr($address['address'], strlen($groupname . ':'))); + } + + // If a group then split on comma and put into an array. + // Otherwise, Just put the whole address in an array. + if ($is_group) { + while (strlen($address['address']) > 0) { + $parts = explode(',', $address['address']); + $addresses[] = $this->_splitCheck($parts, ','); + $address['address'] = trim(substr($address['address'], strlen(end($addresses) . ','))); + } + } else { + $addresses[] = $address['address']; + } + + // Check that $addresses is set, if address like this: + // Groupname:; + // Then errors were appearing. + if (!count($addresses)){ + $this->error = 'Empty group.'; + return false; + } + + // Trim the whitespace from all of the address strings. + array_map('trim', $addresses); + + // Validate each mailbox. + // Format could be one of: name + // geezer@domain.com + // geezer + // ... or any other format valid by RFC 822. + for ($i = 0; $i < count($addresses); $i++) { + if (!$this->validateMailbox($addresses[$i])) { + if (empty($this->error)) { + $this->error = 'Validation failed for: ' . $addresses[$i]; + } + return false; + } + } + + // Nested format + if ($this->nestGroups) { + if ($is_group) { + $structure->addresses = $addresses; + } else { + $structure = $addresses[0]; + } + + // Flat format + } else { + if ($is_group) { + $structure = array_merge($structure, $addresses); + } else { + $structure = $addresses; + } + } + + return $structure; + } + + /** + * Function to validate a phrase. + * + * @access private + * @param string $phrase The phrase to check. + * @return boolean Success or failure. + */ + function _validatePhrase($phrase) + { + // Splits on one or more Tab or space. + $parts = preg_split('/[ \\x09]+/', $phrase, -1, PREG_SPLIT_NO_EMPTY); + + $phrase_parts = array(); + while (count($parts) > 0){ + $phrase_parts[] = $this->_splitCheck($parts, ' '); + for ($i = 0; $i < $this->index + 1; $i++) + array_shift($parts); + } + + foreach ($phrase_parts as $part) { + // If quoted string: + if (substr($part, 0, 1) == '"') { + if (!$this->_validateQuotedString($part)) { + return false; + } + continue; + } + + // Otherwise it's an atom: + if (!$this->_validateAtom($part)) return false; + } + + return true; + } + + /** + * Function to validate an atom which from rfc822 is: + * atom = 1* + * + * If validation ($this->validate) has been turned off, then + * validateAtom() doesn't actually check anything. This is so that you + * can split a list of addresses up before encoding personal names + * (umlauts, etc.), for example. + * + * @access private + * @param string $atom The string to check. + * @return boolean Success or failure. + */ + function _validateAtom($atom) + { + if (!$this->validate) { + // Validation has been turned off; assume the atom is okay. + return true; + } + + // Check for any char from ASCII 0 - ASCII 127 + if (!preg_match('/^[\\x00-\\x7E]+$/i', $atom, $matches)) { + return false; + } + + // Check for specials: + if (preg_match('/[][()<>@,;\\:". ]/', $atom)) { + return false; + } + + // Check for control characters (ASCII 0-31): + if (preg_match('/[\\x00-\\x1F]+/', $atom)) { + return false; + } + + return true; + } + + /** + * Function to validate quoted string, which is: + * quoted-string = <"> *(qtext/quoted-pair) <"> + * + * @access private + * @param string $qstring The string to check + * @return boolean Success or failure. + */ + function _validateQuotedString($qstring) + { + // Leading and trailing " + $qstring = substr($qstring, 1, -1); + + // Perform check, removing quoted characters first. + return !preg_match('/[\x0D\\\\"]/', preg_replace('/\\\\./', '', $qstring)); + } + + /** + * Function to validate a mailbox, which is: + * mailbox = addr-spec ; simple address + * / phrase route-addr ; name and route-addr + * + * @access public + * @param string &$mailbox The string to check. + * @return boolean Success or failure. + */ + function validateMailbox(&$mailbox) + { + // A couple of defaults. + $phrase = ''; + $comment = ''; + $comments = array(); + + // Catch any RFC822 comments and store them separately. + $_mailbox = $mailbox; + while (strlen(trim($_mailbox)) > 0) { + $parts = explode('(', $_mailbox); + $before_comment = $this->_splitCheck($parts, '('); + if ($before_comment != $_mailbox) { + // First char should be a (. + $comment = substr(str_replace($before_comment, '', $_mailbox), 1); + $parts = explode(')', $comment); + $comment = $this->_splitCheck($parts, ')'); + $comments[] = $comment; + + // +1 is for the trailing ) + $_mailbox = substr($_mailbox, strpos($_mailbox, $comment)+strlen($comment)+1); + } else { + break; + } + } + + foreach ($comments as $comment) { + $mailbox = str_replace("($comment)", '', $mailbox); + } + + $mailbox = trim($mailbox); + + // Check for name + route-addr + if (substr($mailbox, -1) == '>' && substr($mailbox, 0, 1) != '<') { + $parts = explode('<', $mailbox); + $name = $this->_splitCheck($parts, '<'); + + $phrase = trim($name); + $route_addr = trim(substr($mailbox, strlen($name.'<'), -1)); + + if ($this->_validatePhrase($phrase) === false || ($route_addr = $this->_validateRouteAddr($route_addr)) === false) { + return false; + } + + // Only got addr-spec + } else { + // First snip angle brackets if present. + if (substr($mailbox, 0, 1) == '<' && substr($mailbox, -1) == '>') { + $addr_spec = substr($mailbox, 1, -1); + } else { + $addr_spec = $mailbox; + } + + if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) { + return false; + } + } + + // Construct the object that will be returned. + $mbox = new stdClass(); + + // Add the phrase (even if empty) and comments + $mbox->personal = $phrase; + $mbox->comment = isset($comments) ? $comments : array(); + + if (isset($route_addr)) { + $mbox->mailbox = $route_addr['local_part']; + $mbox->host = $route_addr['domain']; + $route_addr['adl'] !== '' ? $mbox->adl = $route_addr['adl'] : ''; + } else { + $mbox->mailbox = $addr_spec['local_part']; + $mbox->host = $addr_spec['domain']; + } + + $mailbox = $mbox; + return true; + } + + /** + * This function validates a route-addr which is: + * route-addr = "<" [route] addr-spec ">" + * + * Angle brackets have already been removed at the point of + * getting to this function. + * + * @access private + * @param string $route_addr The string to check. + * @return mixed False on failure, or an array containing validated address/route information on success. + */ + function _validateRouteAddr($route_addr) + { + // Check for colon. + if (strpos($route_addr, ':') !== false) { + $parts = explode(':', $route_addr); + $route = $this->_splitCheck($parts, ':'); + } else { + $route = $route_addr; + } + + // If $route is same as $route_addr then the colon was in + // quotes or brackets or, of course, non existent. + if ($route === $route_addr){ + unset($route); + $addr_spec = $route_addr; + if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) { + return false; + } + } else { + // Validate route part. + if (($route = $this->_validateRoute($route)) === false) { + return false; + } + + $addr_spec = substr($route_addr, strlen($route . ':')); + + // Validate addr-spec part. + if (($addr_spec = $this->_validateAddrSpec($addr_spec)) === false) { + return false; + } + } + + if (isset($route)) { + $return['adl'] = $route; + } else { + $return['adl'] = ''; + } + + $return = array_merge($return, $addr_spec); + return $return; + } + + /** + * Function to validate a route, which is: + * route = 1#("@" domain) ":" + * + * @access private + * @param string $route The string to check. + * @return mixed False on failure, or the validated $route on success. + */ + function _validateRoute($route) + { + // Split on comma. + $domains = explode(',', trim($route)); + + foreach ($domains as $domain) { + $domain = str_replace('@', '', trim($domain)); + if (!$this->_validateDomain($domain)) return false; + } + + return $route; + } + + /** + * Function to validate a domain, though this is not quite what + * you expect of a strict internet domain. + * + * domain = sub-domain *("." sub-domain) + * + * @access private + * @param string $domain The string to check. + * @return mixed False on failure, or the validated domain on success. + */ + function _validateDomain($domain) + { + // Note the different use of $subdomains and $sub_domains + $subdomains = explode('.', $domain); + + while (count($subdomains) > 0) { + $sub_domains[] = $this->_splitCheck($subdomains, '.'); + for ($i = 0; $i < $this->index + 1; $i++) + array_shift($subdomains); + } + + foreach ($sub_domains as $sub_domain) { + if (!$this->_validateSubdomain(trim($sub_domain))) + return false; + } + + // Managed to get here, so return input. + return $domain; + } + + /** + * Function to validate a subdomain: + * subdomain = domain-ref / domain-literal + * + * @access private + * @param string $subdomain The string to check. + * @return boolean Success or failure. + */ + function _validateSubdomain($subdomain) + { + if (preg_match('|^\[(.*)]$|', $subdomain, $arr)){ + if (!$this->_validateDliteral($arr[1])) return false; + } else { + if (!$this->_validateAtom($subdomain)) return false; + } + + // Got here, so return successful. + return true; + } + + /** + * Function to validate a domain literal: + * domain-literal = "[" *(dtext / quoted-pair) "]" + * + * @access private + * @param string $dliteral The string to check. + * @return boolean Success or failure. + */ + function _validateDliteral($dliteral) + { + return !preg_match('/(.)[][\x0D\\\\]/', $dliteral, $matches) && $matches[1] != '\\'; + } + + /** + * Function to validate an addr-spec. + * + * addr-spec = local-part "@" domain + * + * @access private + * @param string $addr_spec The string to check. + * @return mixed False on failure, or the validated addr-spec on success. + */ + function _validateAddrSpec($addr_spec) + { + $addr_spec = trim($addr_spec); + + // Split on @ sign if there is one. + if (strpos($addr_spec, '@') !== false) { + $parts = explode('@', $addr_spec); + $local_part = $this->_splitCheck($parts, '@'); + $domain = substr($addr_spec, strlen($local_part . '@')); + + // No @ sign so assume the default domain. + } else { + $local_part = $addr_spec; + $domain = $this->default_domain; + } + + if (($local_part = $this->_validateLocalPart($local_part)) === false) return false; + if (($domain = $this->_validateDomain($domain)) === false) return false; + + // Got here so return successful. + return array('local_part' => $local_part, 'domain' => $domain); + } + + /** + * Function to validate the local part of an address: + * local-part = word *("." word) + * + * @access private + * @param string $local_part + * @return mixed False on failure, or the validated local part on success. + */ + function _validateLocalPart($local_part) + { + $parts = explode('.', $local_part); + $words = array(); + + // Split the local_part into words. + while (count($parts) > 0){ + $words[] = $this->_splitCheck($parts, '.'); + for ($i = 0; $i < $this->index + 1; $i++) { + array_shift($parts); + } + } + + // Validate each word. + foreach ($words as $word) { + // If this word contains an unquoted space, it is invalid. (6.2.4) + if (strpos($word, ' ') && $word[0] !== '"') + { + return false; + } + + if ($this->_validatePhrase(trim($word)) === false) return false; + } + + // Managed to get here, so return the input. + return $local_part; + } + + /** + * Returns an approximate count of how many addresses are in the + * given string. This is APPROXIMATE as it only splits based on a + * comma which has no preceding backslash. Could be useful as + * large amounts of addresses will end up producing *large* + * structures when used with parseAddressList(). + * + * @param string $data Addresses to count + * @return int Approximate count + */ + function approximateCount($data) + { + return count(preg_split('/(?@. This can be sufficient for most + * people. Optional stricter mode can be utilised which restricts + * mailbox characters allowed to alphanumeric, full stop, hyphen + * and underscore. + * + * @param string $data Address to check + * @param boolean $strict Optional stricter mode + * @return mixed False if it fails, an indexed array + * username/domain if it matches + */ + function isValidInetAddress($data, $strict = false) + { + $regex = $strict ? '/^([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i' : '/^([*+!.&#$|\'\\%\/0-9a-z^_`{}=?~:-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})$/i'; + if (preg_match($regex, trim($data), $matches)) { + return array($matches[1], $matches[2]); + } else { + return false; + } + } + +} -- cgit v1.2.3-54-g00ecf