From 3b7ee5a5f9a2cd3066aff8a7a12d09878be3b06c Mon Sep 17 00:00:00 2001 From: Robin Millette Date: Wed, 13 May 2009 14:27:32 -0400 Subject: rewrote short url stuff to handle new file/url classes (redirections, oembed, mimetypes, etc.) --- classes/File.php | 65 ++++++++++++ classes/File_oembed.php | 43 ++++++++ classes/File_redirection.php | 231 +++++++++++++++++++++++++++++++++++++++++++ classes/File_to_post.php | 17 ++++ classes/Notice.php | 6 +- classes/laconica.ini | 64 ------------ classes/laconica.links.ini | 59 +---------- 7 files changed, 360 insertions(+), 125 deletions(-) (limited to 'classes') diff --git a/classes/File.php b/classes/File.php index 8dd017b79..2ddc5deb8 100644 --- a/classes/File.php +++ b/classes/File.php @@ -20,6 +20,11 @@ if (!defined('LACONICA')) { exit(1); } require_once INSTALLDIR.'/classes/Memcached_DataObject.php'; +require_once INSTALLDIR.'/classes/File_redirection.php'; +require_once INSTALLDIR.'/classes/File_oembed.php'; +require_once INSTALLDIR.'/classes/File_thumbnail.php'; +require_once INSTALLDIR.'/classes/File_to_post.php'; +//require_once INSTALLDIR.'/classes/File_redirection.php'; /** * Table Definition for file @@ -44,4 +49,64 @@ class File extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + function isProtected($url) { + return 'http://www.facebook.com/login.php' === $url; + } + + function saveNew($redir_data, $given_url) { + $x = new File; + $x->url = $given_url; + if (!empty($redir_data['protected'])) $x->protected = $redir_data['protected']; + if (!empty($redir_data['title'])) $x->title = $redir_data['title']; + if (!empty($redir_data['type'])) $x->mimetype = $redir_data['type']; + if (!empty($redir_data['size'])) $x->size = intval($redir_data['size']); + if (isset($redir_data['time']) && $redir_data['time'] > 0) $x->date = intval($redir_data['time']); + $file_id = $x->insert(); + + if (isset($redir_data['type']) + && ('text/html' === substr($redir_data['type'], 0, 9)) + && ($oembed_data = File_oembed::_getOembed($given_url)) + && isset($oembed_data['json'])) { + + File_oembed::saveNew($oembed_data['json'], $file_id); + } + return $x; + } + + function processNew($given_url, $notice_id) { + if (empty($given_url)) return -1; // error, no url to process + $given_url = File_redirection::_canonUrl($given_url); + if (empty($given_url)) return -1; // error, no url to process + $file = File::staticGet('url', $given_url); + if (empty($file->id)) { + $file_redir = File_redirection::staticGet('url', $given_url); + if (empty($file_redir->id)) { + $redir_data = File_redirection::where($given_url); + $redir_url = $redir_data['url']; + if ($redir_url === $given_url) { + $x = File::saveNew($redir_data, $given_url); + $file_id = $x->id; + + } else { + $x = File::processNew($redir_url, $notice_id); + $file_id = $x->id; + File_redirection::saveNew($redir_data, $file_id, $given_url); + } + } else { + $file_id = $file_redir->file_id; + } + } else { + $file_id = $file->id; + $x = $file; + } + + if (empty($x)) { + $x = File::staticGet($file_id); + if (empty($x)) die('Impossible!'); + } + + File_to_post::processNew($file_id, $notice_id); + return $x; + } } diff --git a/classes/File_oembed.php b/classes/File_oembed.php index 33dd8200c..2846f49db 100644 --- a/classes/File_oembed.php +++ b/classes/File_oembed.php @@ -50,4 +50,47 @@ class File_oembed extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + + function _getOembed($url, $maxwidth = 500, $maxheight = 400, $format = 'json') { + $cmd = 'http://oohembed.com/oohembed/?url=' . urlencode($url); + if (is_int($maxwidth)) $cmd .= "&maxwidth=$maxwidth"; + if (is_int($maxheight)) $cmd .= "&maxheight=$maxheight"; + if (is_string($format)) $cmd .= "&format=$format"; + $oe = @file_get_contents($cmd); + if (false === $oe) return false; + return array($format => (('json' === $format) ? json_decode($oe, true) : $oe)); + } + + function saveNew($data, $file_id) { + $file_oembed = new File_oembed; + $file_oembed->file_id = $file_id; + $file_oembed->version = $data['version']; + $file_oembed->type = $data['type']; + if (!empty($data['provider_name'])) $file_oembed->provider = $data['provider_name']; + if (!isset($file_oembed->provider) && !empty($data['provide'])) $file_oembed->provider = $data['provider']; + if (!empty($data['provide_url'])) $file_oembed->provider_url = $data['provider_url']; + if (!empty($data['width'])) $file_oembed->width = intval($data['width']); + if (!empty($data['height'])) $file_oembed->height = intval($data['height']); + if (!empty($data['html'])) $file_oembed->html = $data['html']; + if (!empty($data['title'])) $file_oembed->title = $data['title']; + if (!empty($data['author_name'])) $file_oembed->author_name = $data['author_name']; + if (!empty($data['author_url'])) $file_oembed->author_url = $data['author_url']; + if (!empty($data['url'])) $file_oembed->url = $data['url']; + $file_oembed->insert(); + + if (!empty($data['thumbnail_url'])) { + $tn = new File_thumbnail; + $tn->file_id = $file_id; + $tn->url = $data['thumbnail_url']; + $tn->width = intval($data['thumbnail_width']); + $tn->height = intval($data['thumbnail_height']); + $tn->insert(); + } + + + + } } + + diff --git a/classes/File_redirection.php b/classes/File_redirection.php index e2d1e69c3..a71d1c083 100644 --- a/classes/File_redirection.php +++ b/classes/File_redirection.php @@ -20,6 +20,11 @@ if (!defined('LACONICA')) { exit(1); } require_once INSTALLDIR.'/classes/Memcached_DataObject.php'; +require_once INSTALLDIR.'/classes/File.php'; +require_once INSTALLDIR.'/classes/File_oembed.php'; + +define('USER_AGENT', 'Laconica user agent / file probe'); + /** * Table Definition for file_redirection @@ -42,4 +47,230 @@ class File_redirection extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + + + function _commonCurl($url, $redirs) { + $curlh = curl_init(); + curl_setopt($curlh, CURLOPT_URL, $url); + curl_setopt($curlh, CURLOPT_AUTOREFERER, true); // # setup referer header when folowing redirects + curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 10); // # seconds to wait + curl_setopt($curlh, CURLOPT_MAXREDIRS, $redirs); // # max number of http redirections to follow + curl_setopt($curlh, CURLOPT_USERAGENT, USER_AGENT); + curl_setopt($curlh, CURLOPT_FOLLOWLOCATION, true); // Follow redirects + curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true); + curl_setopt($curlh, CURLOPT_FILETIME, true); + curl_setopt($curlh, CURLOPT_HEADER, true); // Include header in output + return $curlh; + } + + function _redirectWhere_imp($short_url, $redirs = 10, $protected = false) { + if ($redirs < 0) return false; + + // let's see if we know this... + $a = File::staticGet('url', $short_url); + if (empty($a->id)) { + $b = File_redirection::staticGet('url', $short_url); + if (empty($b->id)) { + // we'll have to figure it out + } else { + // this is a redirect to $b->file_id + $a = File::staticGet($b->file_id); + $url = $a->url; + } + } else { + // this is a direct link to $a->url + $url = $a->url; + } + if (isset($url)) { + return $url; + } + + + + $curlh = File_redirection::_commonCurl($short_url, $redirs); + // Don't include body in output + curl_setopt($curlh, CURLOPT_NOBODY, true); + curl_exec($curlh); + $info = curl_getinfo($curlh); + curl_close($curlh); + + if (405 == $info['http_code']) { + $curlh = File_redirection::_commonCurl($short_url, $redirs); + curl_exec($curlh); + $info = curl_getinfo($curlh); + curl_close($curlh); + } + + if (!empty($info['redirect_count']) && File::isProtected($info['url'])) { + return File_redirection::_redirectWhere_imp($short_url, $info['redirect_count'] - 1, true); + } + + $ret = array('code' => $info['http_code'] + , 'redirects' => $info['redirect_count'] + , 'url' => $info['url']); + + if (!empty($info['content_type'])) $ret['type'] = $info['content_type']; + if ($protected) $ret['protected'] = true; + if (!empty($info['download_content_length'])) $ret['size'] = $info['download_content_length']; + if (isset($info['filetime']) && ($info['filetime'] > 0)) $ret['time'] = $info['filetime']; + return $ret; + } + + function where($in_url) { + $ret = File_redirection::_redirectWhere_imp($in_url); + return $ret; + } + + function makeShort($long_url) { + $long_url = File_redirection::_canonUrl($long_url); + // do we already know this long_url and have a short redirection for it? + $file = new File; + $file_redir = new File_redirection; + $file->url = $long_url; + $file->joinAdd($file_redir); + $file->selectAdd('length(file_redirection.url) as len'); + $file->limit(1); + $file->orderBy('len'); + $file->find(true); + if (!empty($file->id)) { + return $file->url; + } + + // if yet unknown, we must find a short url according to user settings + $short_url = File_redirection::_userMakeShort($long_url, common_current_user()); + return $short_url; + } + + function _userMakeShort($long_url, $user) { + if (empty($user)) { + // common current user does not find a user when called from the XMPP daemon + // therefore we'll set one here fix, so that XMPP given URLs may be shortened + $user->urlshorteningservice = 'ur1.ca'; + } + $curlh = curl_init(); + curl_setopt($curlh, CURLOPT_CONNECTTIMEOUT, 20); // # seconds to wait + curl_setopt($curlh, CURLOPT_USERAGENT, 'Laconica'); + curl_setopt($curlh, CURLOPT_RETURNTRANSFER, true); + + switch($user->urlshorteningservice) { + case 'ur1.ca': + require_once INSTALLDIR.'/lib/Shorturl_api.php'; + $short_url_service = new LilUrl; + $short_url = $short_url_service->shorten($long_url); + break; + + case '2tu.us': + $short_url_service = new TightUrl; + require_once INSTALLDIR.'/lib/Shorturl_api.php'; + $short_url = $short_url_service->shorten($long_url); + break; + + case 'ptiturl.com': + require_once INSTALLDIR.'/lib/Shorturl_api.php'; + $short_url_service = new PtitUrl; + $short_url = $short_url_service->shorten($long_url); + break; + + case 'bit.ly': + curl_setopt($curlh, CURLOPT_URL, 'http://bit.ly/api?method=shorten&long_url='.urlencode($long_url)); + $short_url = current(json_decode(curl_exec($curlh))->results)->hashUrl; + break; + + case 'is.gd': + curl_setopt($curlh, CURLOPT_URL, 'http://is.gd/api.php?longurl='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + case 'snipr.com': + curl_setopt($curlh, CURLOPT_URL, 'http://snipr.com/site/snip?r=simple&link='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + case 'metamark.net': + curl_setopt($curlh, CURLOPT_URL, 'http://metamark.net/api/rest/simple?long_url='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + case 'tinyurl.com': + curl_setopt($curlh, CURLOPT_URL, 'http://tinyurl.com/api-create.php?url='.urlencode($long_url)); + $short_url = curl_exec($curlh); + break; + default: + $short_url = false; + } + + curl_close($curlh); + + if ($short_url) { + $short_url = (string)$short_url; +if(1) { + // store it + $file = File::staticGet('url', $long_url); + if (empty($file)) { + $redir_data = File_redirection::where($long_url); + $file = File::saveNew($redir_data, $long_url); + $file_id = $file->id; + if (!empty($redir_data['oembed']['json'])) { + File_oembed::saveNew($redir_data['oembed']['json'], $file_id); + } + } else { + $file_id = $file->id; + } + $file_redir = File_redirection::staticGet('url', $short_url); + if (empty($file_redir)) { + $file_redir = new File_redirection; + $file_redir->url = $short_url; + $file_redir->file_id = $file_id; + $file_redir->insert(); + } } + return $short_url; + } + return $long_url; + } + + function _canonUrl($in_url, $default_scheme = 'http://') { + if (empty($in_url)) return false; + $out_url = $in_url; + $p = parse_url($out_url); + if (empty($p['host']) || empty($p['scheme'])) { + list($scheme) = explode(':', $in_url, 2); + switch ($scheme) { + case 'fax': + case 'tel': + $out_url = str_replace('.-()', '', $out_url); + break; + + case 'mailto': + case 'aim': + case 'jabber': + case 'xmpp': + // don't touch anything + break; + + default: + $out_url = $default_scheme . ltrim($out_url, '/'); + $p = parse_url($out_url); + if (empty($p['scheme'])) return false; + break; + } + } + + if (('ftp' == $p['scheme']) || ('http' == $p['scheme']) || ('https' == $p['scheme'])) { + if (empty($p['host'])) return false; + if (empty($p['path'])) { + $out_url .= '/'; + } + } + + return $out_url; + } + + function saveNew($data, $file_id, $url) { + $file_redir = new File_redirection; + $file_redir->url = $url; + $file_redir->file_id = $file_id; + $file_redir->redirections = intval($data['redirects']); + $file_redir->httpcode = intval($data['code']); + $file_redir->insert(); + } +} + diff --git a/classes/File_to_post.php b/classes/File_to_post.php index bd0528d98..00ddebe6b 100644 --- a/classes/File_to_post.php +++ b/classes/File_to_post.php @@ -40,4 +40,21 @@ class File_to_post extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE + + function processNew($file_id, $notice_id) { + static $seen = array(); + if (empty($seen[$notice_id]) || !in_array($file_id, $seen[$notice_id])) { + $f2p = new File_to_post; + $f2p->file_id = $file_id; + $f2p->post_id = $notice_id; + $f2p->insert(); + if (empty($seen[$notice_id])) { + $seen[$notice_id] = array($file_id); + } else { + $seen[$notice_id][] = $file_id; + } + } + + } } + diff --git a/classes/Notice.php b/classes/Notice.php index 382d160ab..c2fa2d19e 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -124,7 +124,7 @@ class Notice extends Memcached_DataObject $profile = Profile::staticGet($profile_id); - $final = common_shorten_links($content); +// $final = common_shorten_links($content); if (!$profile) { common_log(LOG_ERR, 'Problem saving notice. Unknown user.'); @@ -167,8 +167,8 @@ class Notice extends Memcached_DataObject $notice->reply_to = $reply_to; $notice->created = common_sql_now(); - $notice->content = $final; - $notice->rendered = common_render_content($final, $notice); + $notice->content = $content; + $notice->rendered = common_render_content($content, $notice); $notice->source = $source; $notice->uri = $uri; diff --git a/classes/laconica.ini b/classes/laconica.ini index 00a1b8936..316923af0 100644 --- a/classes/laconica.ini +++ b/classes/laconica.ini @@ -1,67 +1,3 @@ - -[file] -id = 129 -url = 2 -mimetype = 2 -size = 1 -title = 2 -date = 1 -protected = 17 - -[file__keys] -id = K -url = U - -[file_oembed] -id = 129 -file_id = 1 -version = 2 -type = 2 -provider = 2 -provider_url = 2 -width = 1 -height = 1 -html = 66 -title = 2 -author_name = 2 -author_url = 2 -url = 2 - -[file_oembed__keys] -id = K -file_id = U - -[file_redirection] -id = 129 -url = 2 -file_id = 1 -redirections = 1 -httpcode = 1 - -[file_redirection__keys] -id = K -url = U - -[file_thumbnail] -id = 129 -file_id = 1 -url = 2 -width = 1 -height = 1 - -[file_thumbnail__keys] -id = K -file_id = U -url = U - -[file_to_post] -id = 129 -file_id = 1 -post_id = 1 - -[file_to_post__keys] -id = K - [avatar] profile_id = 129 original = 17 diff --git a/classes/laconica.links.ini b/classes/laconica.links.ini index bc52ce578..95c63f3c0 100644 --- a/classes/laconica.links.ini +++ b/classes/laconica.links.ini @@ -53,62 +53,5 @@ file_id = file:id [file_to_post] file_id = file:id -post_id = post:id - -[avatar] -profile_id = profile:id - -[user] -id = profile:id -carrier = sms_carrier:id - -[remote_profile] -id = profile:id - -[notice] -profile_id = profile:id -reply_to = notice:id - -[reply] -notice_id = notice:id -profile_id = profile:id - -[token] -consumer_key = consumer:consumer_key - -[nonce] -consumer_key,token = token:consumer_key,token - -[user_openid] -user_id = user:id - -[confirm_address] -user_id = user:id - -[remember_me] -user_id = user:id - -[queue_item] -notice_id = notice:id - -[subscription] -subscriber = profile:id -subscribed = profile:id - -[fave] -notice_id = notice:id -user_id = user:id - -[file_oembed] -file_id = file:id - -[file_redirection] -file_id = file:id - -[file_thumbnail] -file_id = file:id - -[file_to_post] -file_id = file:id -post_id = post:id +post_id = notice:id -- cgit v1.2.3-54-g00ecf