From 7faf6ec75592764b829b1954352dfaf0329cdbf0 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 7 Mar 2010 23:41:55 -0500 Subject: add a script to import Twitter atom feed as notices --- scripts/importtwitteratom.php | 192 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 scripts/importtwitteratom.php (limited to 'scripts') diff --git a/scripts/importtwitteratom.php b/scripts/importtwitteratom.php new file mode 100644 index 000000000..7316f2108 --- /dev/null +++ b/scripts/importtwitteratom.php @@ -0,0 +1,192 @@ +#!/usr/bin/env php +. + */ + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/..')); + +$shortoptions = 'i:n:f:'; +$longoptions = array('id=', 'nickname=', 'file='); + +$helptext = <<documentElement->namespaceURI != Activity::ATOM || + $dom->documentElement->localName != 'feed') { + throw new Exception("'$filename' is not an Atom feed."); + } + + return $dom; +} + +function importActivityStream($user, $doc) +{ + $feed = $doc->documentElement; + + $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry'); + + for ($i = $entries->length - 1; $i >= 0; $i--) { + $entry = $entries->item($i); + $activity = new Activity($entry, $feed); + $object = $activity->object; + if (!have_option('q', 'quiet')) { + print $activity->content . "\n"; + } + $html = getTweetHtml($object->link); + + $config = array('safe' => 1, + 'deny_attribute' => 'class,rel,id,style,on*'); + + $html = htmLawed($html, $config); + + $content = html_entity_decode(strip_tags($html)); + + $notice = Notice::saveNew($user->id, + $content, + 'importtwitter', + array('uri' => $object->id, + 'url' => $object->link, + 'rendered' => $html, + 'created' => common_sql_date($activity->time), + 'replies' => array(), + 'groups' => array())); + } +} + +function getTweetHtml($url) +{ + try { + $client = new HTTPClient(); + $response = $client->get($url); + } catch (HTTP_Request2_Exception $e) { + print "ERROR: HTTP response " . $e->getMessage() . "\n"; + return false; + } + + if (!$response->isOk()) { + print "ERROR: HTTP response " . $response->getCode() . "\n"; + return false; + } + + $body = $response->getBody(); + + return tweetHtmlFromBody($body); +} + +function tweetHtmlFromBody($body) +{ + $doc = DOMDocument::loadHTML($body); + $xpath = new DOMXPath($doc); + + $spans = $xpath->query('//span[@class="entry-content"]'); + + if ($spans->length == 0) { + print "ERROR: No content in tweet page.\n"; + return ''; + } + + $span = $spans->item(0); + + $children = $span->childNodes; + + $text = ''; + + for ($i = 0; $i < $children->length; $i++) { + $child = $children->item($i); + if ($child instanceof DOMElement && + $child->tagName == 'a' && + !preg_match('#^https?://#', $child->getAttribute('href'))) { + $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href')); + } + $text .= $doc->saveXML($child); + } + + return $text; +} + +try { + + $doc = getAtomFeedDocument(); + $user = getUser(); + + importActivityStream($user, $doc); + +} catch (Exception $e) { + print $e->getMessage()."\n"; + exit(1); +} + -- cgit v1.2.3-54-g00ecf