diff options
author | Brion Vibber <brion@pobox.com> | 2010-03-22 13:56:16 -0700 |
---|---|---|
committer | Brion Vibber <brion@pobox.com> | 2010-03-22 13:56:16 -0700 |
commit | e89908f26140c217e01b2f8f755712f38f3935f3 (patch) | |
tree | 6b241fc2e33f3528cf48b415ef67906826b02e24 /scripts/importtwitteratom.php | |
parent | 714d920faea302b55857cc3bec4e9e6160ea136a (diff) | |
parent | eb563937df921e5fc67ca0c87e229feb2907fd19 (diff) |
Merge branch '0.9.x' of git@gitorious.org:statusnet/mainline into 1.0.x
Conflicts:
lib/channel.php
scripts/imdaemon.php
Diffstat (limited to 'scripts/importtwitteratom.php')
-rw-r--r-- | scripts/importtwitteratom.php | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/scripts/importtwitteratom.php b/scripts/importtwitteratom.php new file mode 100644 index 000000000..7316f2108 --- /dev/null +++ b/scripts/importtwitteratom.php @@ -0,0 +1,192 @@ +#!/usr/bin/env php +<?php +/* + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2010 StatusNet, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/..')); + +$shortoptions = 'i:n:f:'; +$longoptions = array('id=', 'nickname=', 'file='); + +$helptext = <<<END_OF_IMPORTTWITTERATOM_HELP +importtwitteratom.php [options] +import an Atom feed from Twitter as notices by a user + + -i --id ID of user to update + -n --nickname nickname of the user to update + -f --file file to import (Atom-only for now) + +END_OF_IMPORTTWITTERATOM_HELP; + +require_once INSTALLDIR.'/scripts/commandline.inc'; +require_once INSTALLDIR.'/extlib/htmLawed/htmLawed.php'; + +function getUser() +{ + $user = null; + + if (have_option('i', 'id')) { + $id = get_option_value('i', 'id'); + $user = User::staticGet('id', $id); + if (empty($user)) { + throw new Exception("Can't find user with id '$id'."); + } + } else if (have_option('n', 'nickname')) { + $nickname = get_option_value('n', 'nickname'); + $user = User::staticGet('nickname', $nickname); + if (empty($user)) { + throw new Exception("Can't find user with nickname '$nickname'"); + } + } else { + show_help(); + exit(1); + } + + return $user; +} + +function getAtomFeedDocument() +{ + $filename = get_option_value('f', 'file'); + + if (empty($filename)) { + show_help(); + exit(1); + } + + if (!file_exists($filename)) { + throw new Exception("No such file '$filename'."); + } + + if (!is_file($filename)) { + throw new Exception("Not a regular file: '$filename'."); + } + + if (!is_readable($filename)) { + throw new Exception("File '$filename' not readable."); + } + + $xml = file_get_contents($filename); + + $dom = DOMDocument::loadXML($xml); + + if ($dom->documentElement->namespaceURI != Activity::ATOM || + $dom->documentElement->localName != 'feed') { + throw new Exception("'$filename' is not an Atom feed."); + } + + return $dom; +} + +function importActivityStream($user, $doc) +{ + $feed = $doc->documentElement; + + $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry'); + + for ($i = $entries->length - 1; $i >= 0; $i--) { + $entry = $entries->item($i); + $activity = new Activity($entry, $feed); + $object = $activity->object; + if (!have_option('q', 'quiet')) { + print $activity->content . "\n"; + } + $html = getTweetHtml($object->link); + + $config = array('safe' => 1, + 'deny_attribute' => 'class,rel,id,style,on*'); + + $html = htmLawed($html, $config); + + $content = html_entity_decode(strip_tags($html)); + + $notice = Notice::saveNew($user->id, + $content, + 'importtwitter', + array('uri' => $object->id, + 'url' => $object->link, + 'rendered' => $html, + 'created' => common_sql_date($activity->time), + 'replies' => array(), + 'groups' => array())); + } +} + +function getTweetHtml($url) +{ + try { + $client = new HTTPClient(); + $response = $client->get($url); + } catch (HTTP_Request2_Exception $e) { + print "ERROR: HTTP response " . $e->getMessage() . "\n"; + return false; + } + + if (!$response->isOk()) { + print "ERROR: HTTP response " . $response->getCode() . "\n"; + return false; + } + + $body = $response->getBody(); + + return tweetHtmlFromBody($body); +} + +function tweetHtmlFromBody($body) +{ + $doc = DOMDocument::loadHTML($body); + $xpath = new DOMXPath($doc); + + $spans = $xpath->query('//span[@class="entry-content"]'); + + if ($spans->length == 0) { + print "ERROR: No content in tweet page.\n"; + return ''; + } + + $span = $spans->item(0); + + $children = $span->childNodes; + + $text = ''; + + for ($i = 0; $i < $children->length; $i++) { + $child = $children->item($i); + if ($child instanceof DOMElement && + $child->tagName == 'a' && + !preg_match('#^https?://#', $child->getAttribute('href'))) { + $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href')); + } + $text .= $doc->saveXML($child); + } + + return $text; +} + +try { + + $doc = getAtomFeedDocument(); + $user = getUser(); + + importActivityStream($user, $doc); + +} catch (Exception $e) { + print $e->getMessage()."\n"; + exit(1); +} + |