From 79ffebb51b1141791d5ee7478e3a7beaa9fe8faa Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 2 Mar 2010 16:30:09 -0800 Subject: OStatus: save file records for enclosures Also stripping id from foreign HTML messages (could interfere with UI) and disabled failing attachment popup for a.attachment links that don't have a proper id, so you can click through instead of getting an error. Issues: * any other links aren't marked and saved * inconsistent behavior between local and remote attachments (local displays in lightbox, remote doesn't) * if the enclosure'd object isn't referenced in the content, you won't be offered a link to it in our UI --- classes/File.php | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'classes/File.php') diff --git a/classes/File.php b/classes/File.php index 189e04ce0..1b8ef1b3e 100644 --- a/classes/File.php +++ b/classes/File.php @@ -286,5 +286,12 @@ class File extends Memcached_DataObject } return $enclosure; } + + // quick back-compat hack, since there's still code using this + function isEnclosure() + { + $enclosure = $this->getEnclosure(); + return !empty($enclosure); + } } -- cgit v1.2.3-54-g00ecf From f02cb7c71800e6a4426b92ce04c4b8f89006b10a Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Wed, 10 Mar 2010 13:39:42 -0800 Subject: Fix for attachment "h bug": posting a shortened link to an oembed-able resource that has been previously used in the system would incorrectly save "h" as the item's type and title. --- classes/File.php | 9 +++++++- classes/File_oembed.php | 8 ++++++- classes/File_redirection.php | 51 ++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 62 insertions(+), 6 deletions(-) (limited to 'classes/File.php') diff --git a/classes/File.php b/classes/File.php index 1b8ef1b3e..a83ecac4c 100644 --- a/classes/File.php +++ b/classes/File.php @@ -67,7 +67,14 @@ class File extends Memcached_DataObject return $att; } - function saveNew($redir_data, $given_url) { + /** + * Save a new file record. + * + * @param array $redir_data lookup data eg from File_redirection::where() + * @param string $given_url + * @return File + */ + function saveNew(array $redir_data, $given_url) { $x = new File; $x->url = $given_url; if (!empty($redir_data['protected'])) $x->protected = $redir_data['protected']; diff --git a/classes/File_oembed.php b/classes/File_oembed.php index 11f160718..f59eaf24c 100644 --- a/classes/File_oembed.php +++ b/classes/File_oembed.php @@ -81,7 +81,13 @@ class File_oembed extends Memcached_DataObject } } - function saveNew($data, $file_id) { + /** + * Save embedding info for a new file. + * + * @param array $data lookup data as from File_redirection::where + * @param int $file_id + */ + function saveNew(array $data, $file_id) { $file_oembed = new File_oembed; $file_oembed->file_id = $file_id; $file_oembed->version = $data->version; diff --git a/classes/File_redirection.php b/classes/File_redirection.php index 08a6e8d8b..d96979158 100644 --- a/classes/File_redirection.php +++ b/classes/File_redirection.php @@ -115,11 +115,45 @@ class File_redirection extends Memcached_DataObject return $ret; } + /** + * Check if this URL is a redirect and return redir info. + * If a File record is present for this URL, it is not considered a redirect. + * If a File_redirection record is present for this URL, the recorded target is returned. + * + * If no File or File_redirect record is present, the URL is hit and any + * redirects are followed, up to 10 levels or until a protected URL is + * reached. + * + * @param string $in_url + * @return mixed one of: + * string - target URL, if this is a direct link or a known redirect + * array - redirect info if this is an *unknown* redirect: + * associative array with the following elements: + * code: HTTP status code + * redirects: count of redirects followed + * url: URL string of final target + * type (optional): MIME type from Content-Type header + * size (optional): byte size from Content-Length header + * time (optional): timestamp from Last-Modified header + */ function where($in_url) { $ret = File_redirection::_redirectWhere_imp($in_url); return $ret; } + /** + * Shorten a URL with the current user's configured shortening + * options, if applicable. + * + * If it cannot be shortened or the "short" URL is longer than the + * original, the original is returned. + * + * If the referenced item has not been seen before, embedding data + * may be saved. + * + * @param string $long_url + * @return string + */ function makeShort($long_url) { $canon = File_redirection::_canonUrl($long_url); @@ -141,11 +175,20 @@ class File_redirection extends Memcached_DataObject // store it $file = File::staticGet('url', $long_url); if (empty($file)) { + // Check if the target URL is itself a redirect... $redir_data = File_redirection::where($long_url); - $file = File::saveNew($redir_data, $long_url); - $file_id = $file->id; - if (!empty($redir_data['oembed']['json'])) { - File_oembed::saveNew($redir_data['oembed']['json'], $file_id); + if (is_array($redir_data)) { + // We haven't seen the target URL before. + // Save file and embedding data about it! + $file = File::saveNew($redir_data, $long_url); + $file_id = $file->id; + if (!empty($redir_data['oembed']['json'])) { + File_oembed::saveNew($redir_data['oembed']['json'], $file_id); + } + } else if (is_string($redir_data)) { + // The file is a known redirect target. + $file = File::staticGet('url', $redir_data); + $file_id = $file->id; } } else { $file_id = $file->id; -- cgit v1.2.3-54-g00ecf From 294b290dd95a2e4a09026932a2b066ccee587681 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Wed, 10 Mar 2010 14:31:29 -0800 Subject: Fixup script for files w/ bogus data saved into file record ('h bug') --- classes/File.php | 24 +++++++++++++++--- classes/File_oembed.php | 4 +-- classes/File_redirection.php | 60 ++++++++++++++++++++++++++++++-------------- 3 files changed, 64 insertions(+), 24 deletions(-) (limited to 'classes/File.php') diff --git a/classes/File.php b/classes/File.php index a83ecac4c..ba8332841 100644 --- a/classes/File.php +++ b/classes/File.php @@ -84,19 +84,36 @@ class File extends Memcached_DataObject if (isset($redir_data['time']) && $redir_data['time'] > 0) $x->date = intval($redir_data['time']); $file_id = $x->insert(); + $x->saveOembed($redir_data, $given_url); + return $x; + } + + /** + * Save embedding information for this file, if applicable. + * + * Normally this won't need to be called manually, as File::saveNew() + * takes care of it. + * + * @param array $redir_data lookup data eg from File_redirection::where() + * @param string $given_url + * @return boolean success + */ + public function saveOembed($redir_data, $given_url) + { if (isset($redir_data['type']) && (('text/html' === substr($redir_data['type'], 0, 9) || 'application/xhtml+xml' === substr($redir_data['type'], 0, 21))) && ($oembed_data = File_oembed::_getOembed($given_url))) { - $fo = File_oembed::staticGet('file_id', $file_id); + $fo = File_oembed::staticGet('file_id', $this->id); if (empty($fo)) { - File_oembed::saveNew($oembed_data, $file_id); + File_oembed::saveNew($oembed_data, $this->id); + return true; } else { common_log(LOG_WARNING, "Strangely, a File_oembed object exists for new file $file_id", __FILE__); } } - return $x; + return false; } function processNew($given_url, $notice_id=null) { @@ -112,6 +129,7 @@ class File extends Memcached_DataObject $redir_url = $redir_data['url']; } elseif (is_string($redir_data)) { $redir_url = $redir_data; + $redir_data = array(); } else { throw new ServerException("Can't process url '$given_url'"); } diff --git a/classes/File_oembed.php b/classes/File_oembed.php index f59eaf24c..041b44740 100644 --- a/classes/File_oembed.php +++ b/classes/File_oembed.php @@ -84,10 +84,10 @@ class File_oembed extends Memcached_DataObject /** * Save embedding info for a new file. * - * @param array $data lookup data as from File_redirection::where + * @param object $data Services_oEmbed_Object_* * @param int $file_id */ - function saveNew(array $data, $file_id) { + function saveNew($data, $file_id) { $file_oembed = new File_oembed; $file_oembed->file_id = $file_id; $file_oembed->version = $data->version; diff --git a/classes/File_redirection.php b/classes/File_redirection.php index d96979158..f128b3e07 100644 --- a/classes/File_redirection.php +++ b/classes/File_redirection.php @@ -58,24 +58,30 @@ class File_redirection extends Memcached_DataObject return $request; } - function _redirectWhere_imp($short_url, $redirs = 10, $protected = false) { + /** + * Check if this URL is a redirect and return redir info. + * + * Most code should call File_redirection::where instead, to check if we + * already know that redirection and avoid extra hits to the web. + * + * The URL is hit and any redirects are followed, up to 10 levels or until + * a protected URL is reached. + * + * @param string $in_url + * @return mixed one of: + * string - target URL, if this is a direct link or can't be followed + * array - redirect info if this is an *unknown* redirect: + * associative array with the following elements: + * code: HTTP status code + * redirects: count of redirects followed + * url: URL string of final target + * type (optional): MIME type from Content-Type header + * size (optional): byte size from Content-Length header + * time (optional): timestamp from Last-Modified header + */ + public function lookupWhere($short_url, $redirs = 10, $protected = false) { if ($redirs < 0) return false; - // let's see if we know this... - $a = File::staticGet('url', $short_url); - - if (!empty($a)) { - // this is a direct link to $a->url - return $a->url; - } else { - $b = File_redirection::staticGet('url', $short_url); - if (!empty($b)) { - // this is a redirect to $b->file_id - $a = File::staticGet('id', $b->file_id); - return $a->url; - } - } - if(strpos($short_url,'://') === false){ return $short_url; } @@ -93,12 +99,13 @@ class File_redirection extends Memcached_DataObject } } catch (Exception $e) { // Invalid URL or failure to reach server + common_log(LOG_ERR, "Error while following redirects for $short_url: " . $e->getMessage()); return $short_url; } if ($response->getRedirectCount() && File::isProtected($response->getUrl())) { // Bump back up the redirect chain until we find a non-protected URL - return self::_redirectWhere_imp($short_url, $response->getRedirectCount() - 1, true); + return self::lookupWhere($short_url, $response->getRedirectCount() - 1, true); } $ret = array('code' => $response->getStatus() @@ -136,8 +143,23 @@ class File_redirection extends Memcached_DataObject * size (optional): byte size from Content-Length header * time (optional): timestamp from Last-Modified header */ - function where($in_url) { - $ret = File_redirection::_redirectWhere_imp($in_url); + public function where($in_url) { + // let's see if we know this... + $a = File::staticGet('url', $in_url); + + if (!empty($a)) { + // this is a direct link to $a->url + return $a->url; + } else { + $b = File_redirection::staticGet('url', $in_url); + if (!empty($b)) { + // this is a redirect to $b->file_id + $a = File::staticGet('id', $b->file_id); + return $a->url; + } + } + + $ret = File_redirection::lookupWhere($in_url); return $ret; } -- cgit v1.2.3-54-g00ecf From 74fd75555669cfe0a53b6cbc50a425e6f9f093d1 Mon Sep 17 00:00:00 2001 From: Craig Andrews Date: Thu, 11 Mar 2010 17:26:59 -0500 Subject: A null mimetype is not an enclosure (more likely than not means there was an error) --- classes/File.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'classes/File.php') diff --git a/classes/File.php b/classes/File.php index 8c788c007..33273bbdc 100644 --- a/classes/File.php +++ b/classes/File.php @@ -285,7 +285,7 @@ class File extends Memcached_DataObject $enclosure->mimetype=$this->mimetype; if(! isset($this->filename)){ - $notEnclosureMimeTypes = array('text/html','application/xhtml+xml'); + $notEnclosureMimeTypes = array(null,'text/html','application/xhtml+xml'); $mimetype = strtolower($this->mimetype); $semicolon = strpos($mimetype,';'); if($semicolon){ -- cgit v1.2.3-54-g00ecf From 92ded7c6cb56056a89bc8b3caabd08049104898e Mon Sep 17 00:00:00 2001 From: Nick Holliday Date: Wed, 24 Mar 2010 23:30:27 +0000 Subject: Fixes problem with IRC URLs showing as attachments --- classes/File.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'classes/File.php') diff --git a/classes/File.php b/classes/File.php index 33273bbdc..c9477f5f1 100644 --- a/classes/File.php +++ b/classes/File.php @@ -286,7 +286,9 @@ class File extends Memcached_DataObject if(! isset($this->filename)){ $notEnclosureMimeTypes = array(null,'text/html','application/xhtml+xml'); - $mimetype = strtolower($this->mimetype); + if($mimetype != null){ + $mimetype = strtolower($this->mimetype); + } $semicolon = strpos($mimetype,';'); if($semicolon){ $mimetype = substr($mimetype,0,$semicolon); -- cgit v1.2.3-54-g00ecf From 95159112b2331ee832a4cf1e711cb8f1f0193c44 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 25 May 2010 13:09:21 -0700 Subject: Hotpatch for infinite redirection-following loop seen processing URLs to http://clojure.org/ -- if we end up with an unstable redirect target (final item in a redirect chain ends up redirecting us somewhere else when we visit it again), just save the last version we saw instead of trying to start over. Pretty much everything in File and File_redirection initial processing needs to be rewritten to be non-awful; this code is very hard to follow and very easy to make huge bugs. A fair amount of the complication is probably obsoleted by the redirection following being built into HTTPClient now. --- classes/File.php | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'classes/File.php') diff --git a/classes/File.php b/classes/File.php index 33273bbdc..8297e5091 100644 --- a/classes/File.php +++ b/classes/File.php @@ -116,7 +116,11 @@ class File extends Memcached_DataObject return false; } - function processNew($given_url, $notice_id=null) { + /** + * @fixme refactor this mess, it's gotten pretty scary. + * @param bool $followRedirects + */ + function processNew($given_url, $notice_id=null, $followRedirects=true) { if (empty($given_url)) return -1; // error, no url to process $given_url = File_redirection::_canonUrl($given_url); if (empty($given_url)) return -1; // error, no url to process @@ -124,6 +128,10 @@ class File extends Memcached_DataObject if (empty($file)) { $file_redir = File_redirection::staticGet('url', $given_url); if (empty($file_redir)) { + // @fixme for new URLs this also looks up non-redirect data + // such as target content type, size, etc, which we need + // for File::saveNew(); so we call it even if not following + // new redirects. $redir_data = File_redirection::where($given_url); if (is_array($redir_data)) { $redir_url = $redir_data['url']; @@ -134,11 +142,19 @@ class File extends Memcached_DataObject throw new ServerException("Can't process url '$given_url'"); } // TODO: max field length - if ($redir_url === $given_url || strlen($redir_url) > 255) { + if ($redir_url === $given_url || strlen($redir_url) > 255 || !$followRedirects) { $x = File::saveNew($redir_data, $given_url); $file_id = $x->id; } else { - $x = File::processNew($redir_url, $notice_id); + // This seems kind of messed up... for now skipping this part + // if we're already under a redirect, so we don't go into + // horrible infinite loops if we've been given an unstable + // redirect (where the final destination of the first request + // doesn't match what we get when we ask for it again). + // + // Seen in the wild with clojure.org, which redirects through + // wikispaces for auth and appends session data in the URL params. + $x = File::processNew($redir_url, $notice_id, /*followRedirects*/false); $file_id = $x->id; File_redirection::saveNew($redir_data, $file_id, $given_url); } -- cgit v1.2.3-54-g00ecf From dcfe5b24f6047aa830f107628aa3c10b9d292951 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Mon, 28 Jun 2010 15:20:50 -0400 Subject: Fix regression in 92ded7c6cb56056a89bc8b3caabd08049104898e: spewed PHP notices when checking for enclosures due to uninitialized variables. --- classes/File.php | 1 + 1 file changed, 1 insertion(+) (limited to 'classes/File.php') diff --git a/classes/File.php b/classes/File.php index 0cd31075d..0f230a6ee 100644 --- a/classes/File.php +++ b/classes/File.php @@ -302,6 +302,7 @@ class File extends Memcached_DataObject if(! isset($this->filename)){ $notEnclosureMimeTypes = array(null,'text/html','application/xhtml+xml'); + $mimetype = $this->mimetype; if($mimetype != null){ $mimetype = strtolower($this->mimetype); } -- cgit v1.2.3-54-g00ecf