From 0841fa712ec558d283f533690d2db50dfa1da8fc Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 30 Mar 2010 17:35:27 -0700 Subject: Ticket #1281: JID validation now more or less follows spec instead of calling e-mail validator Basic splitting/validation code submitted via http://status.net/wiki/XMPP/JID_validation -- Copyright 2009 Patrick Georgi Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact. Added PEAR Net_IDNA package to extlib to handle IDN normalization (also used by Validate's email verifier if present). * added test suite, supplemented my own test cases with JID validation and normalization test cases from libpurple * follows XMPP rules for validation of name part * fixes for normalization with non-ASCII names * will do domain checks if $config['email']['check_domain'] is on, checking for an XMPP-server SRV record or any lookup. (We don't actually need to ping those direct though.) * some more obscure stringprep validation rules aren't quite followed yet, but we err on the side of permissiveness. * we still don't actually let you save your address with a resource on it, as we strip resources when looking up users who've sent us presence or message updates. I would recommend saving the outgoing resource as a separate field if/when we add that..? --- lib/jabber.php | 179 +++++++++++++++++++++++++++++++++++++++++++++++++++++---- lib/util.php | 49 ++++++++++++++++ 2 files changed, 218 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/jabber.php b/lib/jabber.php index db4e2e9a7..cdcfc4423 100644 --- a/lib/jabber.php +++ b/lib/jabber.php @@ -34,38 +34,197 @@ if (!defined('STATUSNET') && !defined('LACONICA')) { require_once 'XMPPHP/XMPP.php'; /** - * checks whether a string is a syntactically valid Jabber ID (JID) + * Splits a Jabber ID (JID) into node, domain, and resource portions. + * + * Based on validation routine submitted by: + * @copyright 2009 Patrick Georgi + * @license Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact. * * @param string $jid string to check * + * @return array with "node", "domain", and "resource" indices + * @throws Exception if input is not valid + */ + +function jabber_split_jid($jid) +{ + $chars = ''; + /* the following definitions come from stringprep, Appendix C, + which is used in its entirety by nodeprop, Chapter 5, "Prohibited Output" */ + /* C1.1 ASCII space characters */ + $chars .= "\x{20}"; + /* C1.2 Non-ASCII space characters */ + $chars .= "\x{a0}\x{1680}\x{2000}-\x{200b}\x{202f}\x{205f}\x{3000a}"; + /* C2.1 ASCII control characters */ + $chars .= "\x{00}-\x{1f}\x{7f}"; + /* C2.2 Non-ASCII control characters */ + $chars .= "\x{80}-\x{9f}\x{6dd}\x{70f}\x{180e}\x{200c}\x{200d}\x{2028}\x{2029}\x{2060}-\x{2063}\x{206a}-\x{206f}\x{feff}\x{fff9}-\x{fffc}\x{1d173}-\x{1d17a}"; + /* C3 - Private Use */ + $chars .= "\x{e000}-\x{f8ff}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}"; + /* C4 - Non-character code points */ + $chars .= "\x{fdd0}-\x{fdef}\x{fffe}\x{ffff}\x{1fffe}\x{1ffff}\x{2fffe}\x{2ffff}\x{3fffe}\x{3ffff}\x{4fffe}\x{4ffff}\x{5fffe}\x{5ffff}\x{6fffe}\x{6ffff}\x{7fffe}\x{7ffff}\x{8fffe}\x{8ffff}\x{9fffe}\x{9ffff}\x{afffe}\x{affff}\x{bfffe}\x{bffff}\x{cfffe}\x{cffff}\x{dfffe}\x{dffff}\x{efffe}\x{effff}\x{ffffe}\x{fffff}\x{10fffe}\x{10ffff}"; + /* C5 - Surrogate codes */ + $chars .= "\x{d800}-\x{dfff}"; + /* C6 - Inappropriate for plain text */ + $chars .= "\x{fff9}-\x{fffd}"; + /* C7 - Inappropriate for canonical representation */ + $chars .= "\x{2ff0}-\x{2ffb}"; + /* C8 - Change display properties or are deprecated */ + $chars .= "\x{340}\x{341}\x{200e}\x{200f}\x{202a}-\x{202e}\x{206a}-\x{206f}"; + /* C9 - Tagging characters */ + $chars .= "\x{e0001}\x{e0020}-\x{e007f}"; + + /* Nodeprep forbids some more characters */ + $nodeprepchars = $chars; + $nodeprepchars .= "\x{22}\x{26}\x{27}\x{2f}\x{3a}\x{3c}\x{3e}\x{40}"; + + $parts = explode("/", $jid, 2); + if (count($parts) > 1) { + $resource = $parts[1]; + if ($resource == '') { + // Warning: empty resource isn't legit. + // But if we're normalizing, we may as well take it... + } + } else { + $resource = null; + } + + $node = explode("@", $parts[0]); + if ((count($node) > 2) || (count($node) == 0)) { + throw new Exception("Invalid JID: too many @s"); + } else if (count($node) == 1) { + $domain = $node[0]; + $node = null; + } else { + $domain = $node[1]; + $node = $node[0]; + if ($node == '') { + throw new Exception("Invalid JID: @ but no node"); + } + } + + // Length limits per http://xmpp.org/rfcs/rfc3920.html#addressing + if ($node !== null) { + if (strlen($node) > 1023) { + throw new Exception("Invalid JID: node too long."); + } + if (preg_match("/[".$nodeprepchars."]/u", $node)) { + throw new Exception("Invalid JID node '$node'"); + } + } + + if (strlen($domain) > 1023) { + throw new Exception("Invalid JID: domain too long."); + } + if (!common_valid_domain($domain)) { + throw new Exception("Invalid JID domain name '$domain'"); + } + + if ($resource !== null) { + if (strlen($resource) > 1023) { + throw new Exception("Invalid JID: resource too long."); + } + if (preg_match("/[".$chars."]/u", $resource)) { + throw new Exception("Invalid JID resource '$resource'"); + } + } + + return array('node' => is_null($node) ? null : mb_strtolower($node), + 'domain' => is_null($domain) ? null : mb_strtolower($domain), + 'resource' => $resource); +} + +/** + * Checks whether a string is a syntactically valid Jabber ID (JID), + * either with or without a resource. + * + * Note that a bare domain can be a valid JID. + * + * @param string $jid string to check + * @param bool $check_domain whether we should validate that domain... + * * @return boolean whether the string is a valid JID */ +function jabber_valid_full_jid($jid, $check_domain=false) +{ + try { + $parts = jabber_split_jid($jid); + if ($check_domain) { + if (!jabber_check_domain($parts['domain'])) { + return false; + } + } + return $parts['resource'] !== ''; // missing or present; empty ain't kosher + } catch (Exception $e) { + return false; + } +} -function jabber_valid_base_jid($jid) +/** + * Checks whether a string is a syntactically valid base Jabber ID (JID). + * A base JID won't include a resource specifier on the end; since we + * take it off when reading input we can't really use them reliably + * to direct outgoing messages yet (sorry guys!) + * + * Note that a bare domain can be a valid JID. + * + * @param string $jid string to check + * @param bool $check_domain whether we should validate that domain... + * + * @return boolean whether the string is a valid JID + */ +function jabber_valid_base_jid($jid, $check_domain=false) { - // Cheap but effective - return Validate::email($jid); + try { + $parts = jabber_split_jid($jid); + if ($check_domain) { + if (!jabber_check_domain($parts['domain'])) { + return false; + } + } + return ($parts['resource'] === null); // missing; empty ain't kosher + } catch (Exception $e) { + return false; + } } /** - * normalizes a Jabber ID for comparison + * Normalizes a Jabber ID for comparison, dropping the resource component if any. * * @param string $jid JID to check + * @param bool $check_domain if true, reject if the domain isn't findable * * @return string an equivalent JID in normalized (lowercase) form */ function jabber_normalize_jid($jid) { - if (preg_match("/(?:([^\@]+)\@)?([^\/]+)(?:\/(.*))?$/", $jid, $matches)) { - $node = $matches[1]; - $server = $matches[2]; - return strtolower($node.'@'.$server); - } else { + try { + $parts = jabber_split_jid($jid); + if ($parts['node'] !== null) { + return $parts['node'] . '@' . $parts['domain']; + } else { + return $parts['domain']; + } + } catch (Exception $e) { return null; } } +/** + * Check if this domain's got some legit DNS record + */ +function jabber_check_domain($domain) +{ + if (checkdnsrr("_xmpp-server._tcp." . $domain, "SRV")) { + return true; + } + if (checkdnsrr($domain, "ANY")) { + return true; + } + return false; +} + /** * the JID of the Jabber daemon for this StatusNet instance * diff --git a/lib/util.php b/lib/util.php index 795997868..f4ee26bbf 100644 --- a/lib/util.php +++ b/lib/util.php @@ -1397,6 +1397,55 @@ function common_valid_tag($tag) return false; } +/** + * Determine if given domain or address literal is valid + * eg for use in JIDs and URLs. Does not check if the domain + * exists! + * + * @param string $domain + * @return boolean valid or not + */ +function common_valid_domain($domain) +{ + $octet = "(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])"; + $ipv4 = "(?:$octet(?:\.$octet){3})"; + if (preg_match("/^$ipv4$/u", $domain)) return true; + + $group = "(?:[0-9a-f]{1,4})"; + $ipv6 = "(?:\[($group(?::$group){0,7})?(::)?($group(?::$group){0,7})?\])"; // http://tools.ietf.org/html/rfc3513#section-2.2 + + if (preg_match("/^$ipv6$/ui", $domain, $matches)) { + $before = explode(":", $matches[1]); + $zeroes = $matches[2]; + $after = explode(":", $matches[3]); + if ($zeroes) { + $min = 0; + $max = 7; + } else { + $min = 1; + $max = 8; + } + $explicit = count($before) + count($after); + if ($explicit < $min || $explicit > $max) { + return false; + } + return true; + } + + try { + require_once "Net/IDNA.php"; + $idn = Net_IDNA::getInstance(); + $domain = $idn->encode($domain); + } catch (Exception $e) { + return false; + } + + $subdomain = "(?:[a-z0-9][a-z0-9-]*)"; // @fixme + $fqdn = "(?:$subdomain(?:\.$subdomain)*\.?)"; + + return preg_match("/^$fqdn$/ui", $domain); +} + /* Following functions are copied from MediaWiki GlobalFunctions.php * and written by Evan Prodromou. */ -- cgit v1.2.3-54-g00ecf