From 6d29592ec7a37f907256c18aff4afe9cab74d987 Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Thu, 6 Aug 2009 01:15:08 +0000 Subject: Abstract out the parallelizing daemon stuff --- lib/parallelizingdaemon.php | 225 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 225 insertions(+) create mode 100644 lib/parallelizingdaemon.php (limited to 'lib/parallelizingdaemon.php') diff --git a/lib/parallelizingdaemon.php b/lib/parallelizingdaemon.php new file mode 100644 index 000000000..5ecfd98f3 --- /dev/null +++ b/lib/parallelizingdaemon.php @@ -0,0 +1,225 @@ +. + * + * @category Daemon + * @package Laconica + * @author Zach Copley + * @author Evan Prodromou + * @copyright 2009 Control Yourself, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://laconi.ca/ + */ + +if (!defined('LACONICA')) { + exit(1); +} + +declare(ticks = 1); + +/** + * Daemon able to spawn multiple child processes to do work in parallel + * + * @category Daemon + * @package Laconica + * @author Zach Copley + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://laconi.ca/ + */ + +class ParallelizingDaemon extends Daemon +{ + private $_children = array(); + private $_interval = 0; // seconds + private $_max_children = 0; // maximum number of children + private $_debug = false; + + /** + * Constructor + * + * @param string $id the name/id of this daemon + * @param int $interval sleep this long before doing everything again + * @param int $max_children maximum number of child processes at a time + * @param boolean $debug debug output flag + * + * @return void + * + **/ + + function __construct($id = null, $interval = 60, $max_children = 2, + $debug = null) + { + parent::__construct(true); // daemonize + + $this->_interval = $interval; + $this->_max_children = $max_children; + $this->_debug = $debug; + + if (isset($id)) { + $this->set_id($id); + } + } + + /** + * Run the daemon + * + * @return void + */ + + function run() + { + if (isset($this->_debug)) { + echo $this->name() . " - debugging output enabled.\n"; + } + + do { + + $objects = $this->getObjects(); + + foreach ($objects as $o) { + + // Fork a child for each object + + $pid = pcntl_fork(); + + if ($pid == -1) { + die ($this->name() . ' - Couldn\'t fork!'); + } + + if ($pid) { + + // Parent + if (isset($this->_debug)) { + echo $this->name() . + " (parent) forked new child - pid $pid.\n"; + + } + + $this->_children[] = $pid; + + } else { + + // Child + + // Do something with each object + $this->childTask($o); + + exit(); + } + + // Remove child from ps list as it finishes + while (($c = pcntl_wait($status, WNOHANG OR WUNTRACED)) > 0) { + + if (isset($this->_debug)) { + echo $this->name() . " child $c finished.\n"; + } + + $this->removePs($this->_children, $c); + } + + // Wait! We have too many damn kids. + if (sizeof($this->_children) >= $this->_max_children) { + + if (isset($this->_debug)) { + echo $this->name() . " - Too many children. Waiting...\n"; + } + + if (($c = pcntl_wait($status, WUNTRACED)) > 0) { + + if (isset($this->_debug)) { + echo $this->name() . + " - Finished waiting for child $c.\n"; + } + + $this->removePs($this->_children, $c); + } + } + } + + // Remove all children from the process list before restarting + while (($c = pcntl_wait($status, WUNTRACED)) > 0) { + + if (isset($this->_debug)) { + echo $this->name() . " child $c finished.\n"; + } + + $this->removePs($this->_children, $c); + } + + // Rest for a bit + + if (isset($this->_debug)) { + echo $this->name() . ' - Waiting ' . $this->_interval . + " secs before running again.\n"; + } + + if ($this->_interval > 0) { + sleep($this->_interval); + } + + } while (true); + } + + /** + * Remove a child process from the list of children + * + * @param array &$plist array of processes + * @param int $ps process id + * + * @return void + */ + + function removePs(&$plist, $ps) + { + for ($i = 0; $i < sizeof($plist); $i++) { + if ($plist[$i] == $ps) { + unset($plist[$i]); + $plist = array_values($plist); + break; + } + } + } + + /** + * Get a list of objects to work on in parallel + * + * @return array An array of objects to work on + */ + + function getObjects() + { + die('Implement ParallelizingDaemon::getObjects().'); + } + + /** + * Do something with each object in parallel + * + * @param mixed $object data to work on + * + * @return void + */ + + function childTask($object) + { + die("Implement ParallelizingDaemon::childTask($object)."); + } + +} \ No newline at end of file -- cgit v1.2.3-54-g00ecf From c03d5932877c15eb673febabda5227df2173fbad Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Thu, 6 Aug 2009 22:52:58 +0000 Subject: Make TwitterStatusFetcher extend ParallelizingDaemon --- lib/parallelizingdaemon.php | 12 +- scripts/synctwitterfriends.php | 20 ++- scripts/twitterstatusfetcher.php | 289 ++++++++++++--------------------------- 3 files changed, 110 insertions(+), 211 deletions(-) (limited to 'lib/parallelizingdaemon.php') diff --git a/lib/parallelizingdaemon.php b/lib/parallelizingdaemon.php index 5ecfd98f3..dc28b5643 100644 --- a/lib/parallelizingdaemon.php +++ b/lib/parallelizingdaemon.php @@ -87,7 +87,7 @@ class ParallelizingDaemon extends Daemon function run() { if (isset($this->_debug)) { - echo $this->name() . " - debugging output enabled.\n"; + echo $this->name() . " - Debugging output enabled.\n"; } do { @@ -107,9 +107,10 @@ class ParallelizingDaemon extends Daemon if ($pid) { // Parent + if (isset($this->_debug)) { echo $this->name() . - " (parent) forked new child - pid $pid.\n"; + " - Forked new child - pid $pid.\n"; } @@ -120,22 +121,25 @@ class ParallelizingDaemon extends Daemon // Child // Do something with each object + $this->childTask($o); exit(); } // Remove child from ps list as it finishes + while (($c = pcntl_wait($status, WNOHANG OR WUNTRACED)) > 0) { if (isset($this->_debug)) { - echo $this->name() . " child $c finished.\n"; + echo $this->name() . " - Child $c finished.\n"; } $this->removePs($this->_children, $c); } // Wait! We have too many damn kids. + if (sizeof($this->_children) >= $this->_max_children) { if (isset($this->_debug)) { @@ -158,7 +162,7 @@ class ParallelizingDaemon extends Daemon while (($c = pcntl_wait($status, WUNTRACED)) > 0) { if (isset($this->_debug)) { - echo $this->name() . " child $c finished.\n"; + echo $this->name() . " - Child $c finished.\n"; } $this->removePs($this->_children, $c); diff --git a/scripts/synctwitterfriends.php b/scripts/synctwitterfriends.php index 1bd75bac1..37f7842a1 100755 --- a/scripts/synctwitterfriends.php +++ b/scripts/synctwitterfriends.php @@ -54,6 +54,18 @@ require_once INSTALLDIR . '/lib/parallelizingdaemon.php'; class SyncTwitterFriendsDaemon extends ParallelizingDaemon { + /** + * Constructor + * + * @param string $id the name/id of this daemon + * @param int $interval sleep this long before doing everything again + * @param int $max_children maximum number of child processes at a time + * @param boolean $debug debug output flag + * + * @return void + * + **/ + function __construct($id = null, $interval = 60, $max_children = 2, $debug = null) { @@ -71,6 +83,12 @@ class SyncTwitterFriendsDaemon extends ParallelizingDaemon return ('synctwitterfriendsdaemon.' . $this->_id); } + /** + * Find all the Twitter foreign links for users who have requested + * automatically subscribing to their Twitter friends locally. + * + * @return array flinks an array of Foreign_link objects + */ function getObjects() { $flinks = array(); @@ -237,8 +255,6 @@ class SyncTwitterFriendsDaemon extends ParallelizingDaemon } -declare(ticks = 1); - $id = null; $debug = null; diff --git a/scripts/twitterstatusfetcher.php b/scripts/twitterstatusfetcher.php index 67f52a3cc..fa37f894c 100755 --- a/scripts/twitterstatusfetcher.php +++ b/scripts/twitterstatusfetcher.php @@ -56,17 +56,23 @@ require_once INSTALLDIR . '/lib/daemon.php'; // NOTE: an Avatar path MUST be set in config.php for this // script to work: e.g.: $config['avatar']['path'] = '/laconica/avatar'; -class TwitterStatusFetcher extends Daemon +class TwitterStatusFetcher extends ParallelizingDaemon { - private $_children = array(); - - function __construct($id=null, $daemonize=true) + /** + * Constructor + * + * @param string $id the name/id of this daemon + * @param int $interval sleep this long before doing everything again + * @param int $max_children maximum number of child processes at a time + * @param boolean $debug debug output flag + * + * @return void + * + **/ + function __construct($id = null, $interval = 60, + $max_children = 2, $debug = null) { - parent::__construct($daemonize); - - if ($id) { - $this->set_id($id); - } + parent::__construct($id, $interval, $max_children, $debug); } /** @@ -81,123 +87,13 @@ class TwitterStatusFetcher extends Daemon } /** - * Run the daemon - * - * @return void - */ - - function run() - { - if (defined('SCRIPT_DEBUG')) { - common_debug($this->name() . - ': debugging log output enabled.'); - } - - do { - - $flinks = $this->refreshFlinks(); - - foreach ($flinks as $f) { - - $pid = pcntl_fork(); - - if ($pid == -1) { - die ("Couldn't fork!"); - } - - if ($pid) { - - // Parent - if (defined('SCRIPT_DEBUG')) { - common_debug("Parent: forked new status ". - " fetcher process " . $pid); - } - - $this->_children[] = $pid; - - } else { - - // Child - - // Each child ps needs its own DB connection - - // Note: DataObject::getDatabaseConnection() creates - // a new connection if there isn't one already - - global $_DB_DATAOBJECT; - $conn = &$f->getDatabaseConnection(); - - $this->getTimeline($f); - - $conn->disconnect(); - - // XXX: Couldn't find a less brutal way to blow - // away a cached connection - - unset($_DB_DATAOBJECT['CONNECTIONS']); - - exit(); - } - - // Remove child from ps list as it finishes - while (($c = pcntl_wait($status, WNOHANG OR WUNTRACED)) > 0) { - - if (defined('SCRIPT_DEBUG')) { - common_debug("Child $c finished."); - } - - $this->removePs($this->_children, $c); - } - - // Wait! We have too many damn kids. - if (sizeof($this->_children) > MAXCHILDREN) { - - if (defined('SCRIPT_DEBUG')) { - common_debug('Too many children. Waiting...'); - } - - if (($c = pcntl_wait($status, WUNTRACED)) > 0) { - - if (defined('SCRIPT_DEBUG')) { - common_debug("Finished waiting for $c"); - } - - $this->removePs($this->_children, $c); - } - } - } - - // Remove all children from the process list before restarting - while (($c = pcntl_wait($status, WUNTRACED)) > 0) { - - if (defined('SCRIPT_DEBUG')) { - common_debug("Child $c finished."); - } - - $this->removePs($this->_children, $c); - } - - // Rest for a bit before we fetch more statuses - - if (defined('SCRIPT_DEBUG')) { - common_debug('Waiting ' . POLL_INTERVAL . - ' secs before hitting Twitter again.'); - } - - if (POLL_INTERVAL > 0) { - sleep(POLL_INTERVAL); - } - - } while (true); - } - - /** - * Refresh the foreign links for this user + * Find all the Twitter foreign links for users who have requested + * importing of their friends' timelines * - * @return void + * @return array flinks an array of Foreign_link objects */ - function refreshFlinks() + function getObjects() { global $_DB_DATAOBJECT; @@ -205,15 +101,8 @@ class TwitterStatusFetcher extends Daemon $conn = &$flink->getDatabaseConnection(); $flink->service = TWITTER_SERVICE; - $flink->orderBy('last_noticesync'); - - $cnt = $flink->find(); - - if (defined('SCRIPT_DEBUG')) { - common_debug('Updating Twitter friends subscriptions' . - " for $cnt users."); - } + $flink->find(); $flinks = array(); @@ -234,39 +123,39 @@ class TwitterStatusFetcher extends Daemon return $flinks; } - /** - * Unknown - * - * @param array &$plist unknown. - * @param string $ps unknown. - * - * @return unknown - * @todo document - */ + function childTask($flink) { - function removePs(&$plist, $ps) - { - for ($i = 0; $i < sizeof($plist); $i++) { - if ($plist[$i] == $ps) { - unset($plist[$i]); - $plist = array_values($plist); - break; - } - } + // Each child ps needs its own DB connection + + // Note: DataObject::getDatabaseConnection() creates + // a new connection if there isn't one already + + $conn = &$flink->getDatabaseConnection(); + + $this->getTimeline($flink); + + $flink->last_friendsync = common_sql_now(); + $flink->update(); + + $conn->disconnect(); + + // XXX: Couldn't find a less brutal way to blow + // away a cached connection + + global $_DB_DATAOBJECT; + unset($_DB_DATAOBJECT['CONNECTIONS']); } function getTimeline($flink) { if (empty($flink)) { - common_log(LOG_WARNING, - "Can't retrieve Foreign_link for foreign ID $fid"); + common_log(LOG_WARNING, $this->name() . + " - Can't retrieve Foreign_link for foreign ID $fid"); return; } - if (defined('SCRIPT_DEBUG')) { - common_debug('Trying to get timeline for Twitter user ' . - $flink->foreign_id); - } + common_debug($this->name() . ' - Trying to get timeline for Twitter user ' . + $flink->foreign_id); // XXX: Biggest remaining issue - How do we know at which status // to start importing? How many statuses? Right now I'm going @@ -279,28 +168,28 @@ class TwitterStatusFetcher extends Daemon try { $timeline = $client->statuses_friends_timeline(); } catch (OAuthClientCurlException $e) { - common_log(LOG_WARNING, - 'OAuth client unable to get friends timeline for user ' . + common_log(LOG_WARNING, $this->name() . + ' - OAuth client unable to get friends timeline for user ' . $flink->user_id . ' - code: ' . $e->getCode() . 'msg: ' . $e->getMessage()); } if (empty($timeline)) { - common_log(LOG_WARNING, "Empty timeline."); + common_log(LOG_WARNING, $this->name . " - Empty timeline."); return; } // Reverse to preserve order + foreach (array_reverse($timeline) as $status) { // Hacktastic: filter out stuff coming from this Laconica + $source = mb_strtolower(common_config('integration', 'source')); if (preg_match("/$source/", mb_strtolower($status->source))) { - if (defined('SCRIPT_DEBUG')) { - common_debug('Skipping import of status ' . $status->id . - ' with source ' . $source); - } + common_debug($this->name() . ' - Skipping import of status ' . + $status->id . ' with source ' . $source); continue; } @@ -308,6 +197,7 @@ class TwitterStatusFetcher extends Daemon } // Okay, record the time we synced with Twitter for posterity + $flink->last_noticesync = common_sql_now(); $flink->update(); } @@ -319,8 +209,8 @@ class TwitterStatusFetcher extends Daemon $profile = Profile::staticGet($id); if (empty($profile)) { - common_log(LOG_ERR, - 'Problem saving notice. No associated Profile.'); + common_log(LOG_ERR, $this->name() . + ' - Problem saving notice. No associated Profile.'); return null; } @@ -344,7 +234,7 @@ class TwitterStatusFetcher extends Daemon $notice->content = common_shorten_links($status->text); // XXX $notice->rendered = common_render_content($notice->content, $notice); $notice->source = 'twitter'; - $notice->reply_to = null; // XXX lookup reply + $notice->reply_to = null; // XXX: lookup reply $notice->is_local = Notice::GATEWAY; if (Event::handle('StartNoticeSave', array(&$notice))) { @@ -370,24 +260,22 @@ class TwitterStatusFetcher extends Daemon function ensureProfile($user) { // check to see if there's already a profile for this user + $profileurl = 'http://twitter.com/' . $user->screen_name; $profile = Profile::staticGet('profileurl', $profileurl); if (!empty($profile)) { - if (defined('SCRIPT_DEBUG')) { - common_debug("Profile for $profile->nickname found."); - } + common_debug($this->name() . + " - Profile for $profile->nickname found."); // Check to see if the user's Avatar has changed - $this->checkAvatar($user, $profile); + $this->checkAvatar($user, $profile); return $profile->id; } else { - if (defined('SCRIPT_DEBUG')) { - common_debug('Adding profile and remote profile ' . - "for Twitter user: $profileurl"); - } + common_debug($this->name() . ' - Adding profile and remote profile ' . + "for Twitter user: $profileurl."); $profile = new Profile(); $profile->query("BEGIN"); @@ -409,6 +297,7 @@ class TwitterStatusFetcher extends Daemon } // check for remote profile + $remote_pro = Remote_profile::staticGet('uri', $profileurl); if (empty($remote_pro)) { @@ -448,23 +337,18 @@ class TwitterStatusFetcher extends Daemon $oldname = $profile->getAvatar(48)->filename; if ($newname != $oldname) { - - if (defined('SCRIPT_DEBUG')) { - common_debug('Avatar for Twitter user ' . - "$profile->nickname has changed."); - common_debug("old: $oldname new: $newname"); - } + common_debug($this->name() . ' - Avatar for Twitter user ' . + "$profile->nickname has changed."); + common_debug($this->name() . " - old: $oldname new: $newname"); $this->updateAvatars($twitter_user, $profile); } if ($this->missingAvatarFile($profile)) { - - if (defined('SCRIPT_DEBUG')) { - common_debug('Twitter user ' . $profile->nickname . - ' is missing one or more local avatars.'); - common_debug("old: $oldname new: $newname"); - } + common_debug($this->name() . ' - Twitter user ' . + $profile->nickname . + ' is missing one or more local avatars.'); + common_debug($this->name() ." - old: $oldname new: $newname"); $this->updateAvatars($twitter_user, $profile); } @@ -544,23 +428,20 @@ class TwitterStatusFetcher extends Daemon if ($this->fetchAvatar($url, $filename)) { $this->newAvatar($id, $size, $mediatype, $filename); } else { - common_log(LOG_WARNING, "Problem fetching Avatar: $url", __FILE__); + common_log(LOG_WARNING, $this->id() . + " - Problem fetching Avatar: $url"); } } } function updateAvatar($profile_id, $size, $mediatype, $filename) { - if (defined('SCRIPT_DEBUG')) { - common_debug("Updating avatar: $size"); - } + common_debug($this->name() . " - Updating avatar: $size"); $profile = Profile::staticGet($profile_id); if (empty($profile)) { - if (defined('SCRIPT_DEBUG')) { - common_debug("Couldn't get profile: $profile_id!"); - } + common_debug($this->name() . " - Couldn't get profile: $profile_id!"); return; } @@ -568,6 +449,7 @@ class TwitterStatusFetcher extends Daemon $avatar = $profile->getAvatar($sizes[$size]); // Delete the avatar, if present + if ($avatar) { $avatar->delete(); } @@ -605,9 +487,7 @@ class TwitterStatusFetcher extends Daemon $avatar->filename = $filename; $avatar->url = Avatar::url($filename); - if (defined('SCRIPT_DEBUG')) { - common_debug("new filename: $avatar->url"); - } + common_debug($this->name() . " - New filename: $avatar->url"); $avatar->created = common_sql_now(); @@ -618,9 +498,8 @@ class TwitterStatusFetcher extends Daemon return null; } - if (defined('SCRIPT_DEBUG')) { - common_debug("Saved new $size avatar for $profile_id."); - } + common_debug($this->name() . + " - Saved new $size avatar for $profile_id."); return $id; } @@ -633,13 +512,12 @@ class TwitterStatusFetcher extends Daemon $out = fopen($avatarfile, 'wb'); if (!$out) { - common_log(LOG_WARNING, "Couldn't open file $filename", __FILE__); + common_log(LOG_WARNING, $this->name() . + " - Couldn't open file $filename"); return false; } - if (defined('SCRIPT_DEBUG')) { - common_debug("Fetching avatar: $url"); - } + common_debug($this->name() . " - Fetching Twitter avatar: $url"); $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); @@ -656,7 +534,8 @@ class TwitterStatusFetcher extends Daemon } } -declare(ticks = 1); +$id = null; +$debug = null; if (have_option('i')) { $id = get_option_value('i'); @@ -669,9 +548,9 @@ if (have_option('i')) { } if (have_option('d') || have_option('debug')) { - define('SCRIPT_DEBUG', true); + $debug = true; } -$fetcher = new TwitterStatusFetcher($id); +$fetcher = new TwitterStatusFetcher($id, 60, 2, $debug); $fetcher->runOnce(); -- cgit v1.2.3-54-g00ecf