summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorBrion Vibber <brion@pobox.com>2010-03-30 17:35:27 -0700
committerBrion Vibber <brion@pobox.com>2010-03-30 17:35:27 -0700
commit0841fa712ec558d283f533690d2db50dfa1da8fc (patch)
tree406ea631c8704dfcfd81dceaacf5a1785ff2103b /lib
parentbfb2ac4910a52dde9bd3c39855f0488f56eb79bc (diff)
Ticket #1281: JID validation now more or less follows spec instead of calling e-mail validator
Basic splitting/validation code submitted via http://status.net/wiki/XMPP/JID_validation -- Copyright 2009 Patrick Georgi <patrick@georgi-clan.de> Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact. Added PEAR Net_IDNA package to extlib to handle IDN normalization (also used by Validate's email verifier if present). * added test suite, supplemented my own test cases with JID validation and normalization test cases from libpurple * follows XMPP rules for validation of name part * fixes for normalization with non-ASCII names * will do domain checks if $config['email']['check_domain'] is on, checking for an XMPP-server SRV record or any lookup. (We don't actually need to ping those direct though.) * some more obscure stringprep validation rules aren't quite followed yet, but we err on the side of permissiveness. * we still don't actually let you save your address with a resource on it, as we strip resources when looking up users who've sent us presence or message updates. I would recommend saving the outgoing resource as a separate field if/when we add that..?
Diffstat (limited to 'lib')
-rw-r--r--lib/jabber.php179
-rw-r--r--lib/util.php49
2 files changed, 218 insertions, 10 deletions
diff --git a/lib/jabber.php b/lib/jabber.php
index db4e2e9a7..cdcfc4423 100644
--- a/lib/jabber.php
+++ b/lib/jabber.php
@@ -34,39 +34,198 @@ if (!defined('STATUSNET') && !defined('LACONICA')) {
require_once 'XMPPHP/XMPP.php';
/**
- * checks whether a string is a syntactically valid Jabber ID (JID)
+ * Splits a Jabber ID (JID) into node, domain, and resource portions.
+ *
+ * Based on validation routine submitted by:
+ * @copyright 2009 Patrick Georgi <patrick@georgi-clan.de>
+ * @license Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact.
*
* @param string $jid string to check
*
+ * @return array with "node", "domain", and "resource" indices
+ * @throws Exception if input is not valid
+ */
+
+function jabber_split_jid($jid)
+{
+ $chars = '';
+ /* the following definitions come from stringprep, Appendix C,
+ which is used in its entirety by nodeprop, Chapter 5, "Prohibited Output" */
+ /* C1.1 ASCII space characters */
+ $chars .= "\x{20}";
+ /* C1.2 Non-ASCII space characters */
+ $chars .= "\x{a0}\x{1680}\x{2000}-\x{200b}\x{202f}\x{205f}\x{3000a}";
+ /* C2.1 ASCII control characters */
+ $chars .= "\x{00}-\x{1f}\x{7f}";
+ /* C2.2 Non-ASCII control characters */
+ $chars .= "\x{80}-\x{9f}\x{6dd}\x{70f}\x{180e}\x{200c}\x{200d}\x{2028}\x{2029}\x{2060}-\x{2063}\x{206a}-\x{206f}\x{feff}\x{fff9}-\x{fffc}\x{1d173}-\x{1d17a}";
+ /* C3 - Private Use */
+ $chars .= "\x{e000}-\x{f8ff}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}";
+ /* C4 - Non-character code points */
+ $chars .= "\x{fdd0}-\x{fdef}\x{fffe}\x{ffff}\x{1fffe}\x{1ffff}\x{2fffe}\x{2ffff}\x{3fffe}\x{3ffff}\x{4fffe}\x{4ffff}\x{5fffe}\x{5ffff}\x{6fffe}\x{6ffff}\x{7fffe}\x{7ffff}\x{8fffe}\x{8ffff}\x{9fffe}\x{9ffff}\x{afffe}\x{affff}\x{bfffe}\x{bffff}\x{cfffe}\x{cffff}\x{dfffe}\x{dffff}\x{efffe}\x{effff}\x{ffffe}\x{fffff}\x{10fffe}\x{10ffff}";
+ /* C5 - Surrogate codes */
+ $chars .= "\x{d800}-\x{dfff}";
+ /* C6 - Inappropriate for plain text */
+ $chars .= "\x{fff9}-\x{fffd}";
+ /* C7 - Inappropriate for canonical representation */
+ $chars .= "\x{2ff0}-\x{2ffb}";
+ /* C8 - Change display properties or are deprecated */
+ $chars .= "\x{340}\x{341}\x{200e}\x{200f}\x{202a}-\x{202e}\x{206a}-\x{206f}";
+ /* C9 - Tagging characters */
+ $chars .= "\x{e0001}\x{e0020}-\x{e007f}";
+
+ /* Nodeprep forbids some more characters */
+ $nodeprepchars = $chars;
+ $nodeprepchars .= "\x{22}\x{26}\x{27}\x{2f}\x{3a}\x{3c}\x{3e}\x{40}";
+
+ $parts = explode("/", $jid, 2);
+ if (count($parts) > 1) {
+ $resource = $parts[1];
+ if ($resource == '') {
+ // Warning: empty resource isn't legit.
+ // But if we're normalizing, we may as well take it...
+ }
+ } else {
+ $resource = null;
+ }
+
+ $node = explode("@", $parts[0]);
+ if ((count($node) > 2) || (count($node) == 0)) {
+ throw new Exception("Invalid JID: too many @s");
+ } else if (count($node) == 1) {
+ $domain = $node[0];
+ $node = null;
+ } else {
+ $domain = $node[1];
+ $node = $node[0];
+ if ($node == '') {
+ throw new Exception("Invalid JID: @ but no node");
+ }
+ }
+
+ // Length limits per http://xmpp.org/rfcs/rfc3920.html#addressing
+ if ($node !== null) {
+ if (strlen($node) > 1023) {
+ throw new Exception("Invalid JID: node too long.");
+ }
+ if (preg_match("/[".$nodeprepchars."]/u", $node)) {
+ throw new Exception("Invalid JID node '$node'");
+ }
+ }
+
+ if (strlen($domain) > 1023) {
+ throw new Exception("Invalid JID: domain too long.");
+ }
+ if (!common_valid_domain($domain)) {
+ throw new Exception("Invalid JID domain name '$domain'");
+ }
+
+ if ($resource !== null) {
+ if (strlen($resource) > 1023) {
+ throw new Exception("Invalid JID: resource too long.");
+ }
+ if (preg_match("/[".$chars."]/u", $resource)) {
+ throw new Exception("Invalid JID resource '$resource'");
+ }
+ }
+
+ return array('node' => is_null($node) ? null : mb_strtolower($node),
+ 'domain' => is_null($domain) ? null : mb_strtolower($domain),
+ 'resource' => $resource);
+}
+
+/**
+ * Checks whether a string is a syntactically valid Jabber ID (JID),
+ * either with or without a resource.
+ *
+ * Note that a bare domain can be a valid JID.
+ *
+ * @param string $jid string to check
+ * @param bool $check_domain whether we should validate that domain...
+ *
* @return boolean whether the string is a valid JID
*/
+function jabber_valid_full_jid($jid, $check_domain=false)
+{
+ try {
+ $parts = jabber_split_jid($jid);
+ if ($check_domain) {
+ if (!jabber_check_domain($parts['domain'])) {
+ return false;
+ }
+ }
+ return $parts['resource'] !== ''; // missing or present; empty ain't kosher
+ } catch (Exception $e) {
+ return false;
+ }
+}
-function jabber_valid_base_jid($jid)
+/**
+ * Checks whether a string is a syntactically valid base Jabber ID (JID).
+ * A base JID won't include a resource specifier on the end; since we
+ * take it off when reading input we can't really use them reliably
+ * to direct outgoing messages yet (sorry guys!)
+ *
+ * Note that a bare domain can be a valid JID.
+ *
+ * @param string $jid string to check
+ * @param bool $check_domain whether we should validate that domain...
+ *
+ * @return boolean whether the string is a valid JID
+ */
+function jabber_valid_base_jid($jid, $check_domain=false)
{
- // Cheap but effective
- return Validate::email($jid);
+ try {
+ $parts = jabber_split_jid($jid);
+ if ($check_domain) {
+ if (!jabber_check_domain($parts['domain'])) {
+ return false;
+ }
+ }
+ return ($parts['resource'] === null); // missing; empty ain't kosher
+ } catch (Exception $e) {
+ return false;
+ }
}
/**
- * normalizes a Jabber ID for comparison
+ * Normalizes a Jabber ID for comparison, dropping the resource component if any.
*
* @param string $jid JID to check
+ * @param bool $check_domain if true, reject if the domain isn't findable
*
* @return string an equivalent JID in normalized (lowercase) form
*/
function jabber_normalize_jid($jid)
{
- if (preg_match("/(?:([^\@]+)\@)?([^\/]+)(?:\/(.*))?$/", $jid, $matches)) {
- $node = $matches[1];
- $server = $matches[2];
- return strtolower($node.'@'.$server);
- } else {
+ try {
+ $parts = jabber_split_jid($jid);
+ if ($parts['node'] !== null) {
+ return $parts['node'] . '@' . $parts['domain'];
+ } else {
+ return $parts['domain'];
+ }
+ } catch (Exception $e) {
return null;
}
}
/**
+ * Check if this domain's got some legit DNS record
+ */
+function jabber_check_domain($domain)
+{
+ if (checkdnsrr("_xmpp-server._tcp." . $domain, "SRV")) {
+ return true;
+ }
+ if (checkdnsrr($domain, "ANY")) {
+ return true;
+ }
+ return false;
+}
+
+/**
* the JID of the Jabber daemon for this StatusNet instance
*
* @return string JID of the Jabber daemon
diff --git a/lib/util.php b/lib/util.php
index 795997868..f4ee26bbf 100644
--- a/lib/util.php
+++ b/lib/util.php
@@ -1397,6 +1397,55 @@ function common_valid_tag($tag)
return false;
}
+/**
+ * Determine if given domain or address literal is valid
+ * eg for use in JIDs and URLs. Does not check if the domain
+ * exists!
+ *
+ * @param string $domain
+ * @return boolean valid or not
+ */
+function common_valid_domain($domain)
+{
+ $octet = "(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])";
+ $ipv4 = "(?:$octet(?:\.$octet){3})";
+ if (preg_match("/^$ipv4$/u", $domain)) return true;
+
+ $group = "(?:[0-9a-f]{1,4})";
+ $ipv6 = "(?:\[($group(?::$group){0,7})?(::)?($group(?::$group){0,7})?\])"; // http://tools.ietf.org/html/rfc3513#section-2.2
+
+ if (preg_match("/^$ipv6$/ui", $domain, $matches)) {
+ $before = explode(":", $matches[1]);
+ $zeroes = $matches[2];
+ $after = explode(":", $matches[3]);
+ if ($zeroes) {
+ $min = 0;
+ $max = 7;
+ } else {
+ $min = 1;
+ $max = 8;
+ }
+ $explicit = count($before) + count($after);
+ if ($explicit < $min || $explicit > $max) {
+ return false;
+ }
+ return true;
+ }
+
+ try {
+ require_once "Net/IDNA.php";
+ $idn = Net_IDNA::getInstance();
+ $domain = $idn->encode($domain);
+ } catch (Exception $e) {
+ return false;
+ }
+
+ $subdomain = "(?:[a-z0-9][a-z0-9-]*)"; // @fixme
+ $fqdn = "(?:$subdomain(?:\.$subdomain)*\.?)";
+
+ return preg_match("/^$fqdn$/ui", $domain);
+}
+
/* Following functions are copied from MediaWiki GlobalFunctions.php
* and written by Evan Prodromou. */