diff options
-rw-r--r-- | plugins/Sitemap/SitemapPlugin.php | 163 | ||||
-rw-r--r-- | plugins/Sitemap/Sitemap_notice_count.php | 287 | ||||
-rw-r--r-- | plugins/Sitemap/Sitemap_user_count.php | 284 | ||||
-rw-r--r-- | plugins/Sitemap/noticesitemap.php | 137 | ||||
-rw-r--r-- | plugins/Sitemap/sitemapaction.php | 95 | ||||
-rw-r--r-- | plugins/Sitemap/sitemapindex.php | 128 | ||||
-rw-r--r-- | plugins/Sitemap/usersitemap.php | 128 |
7 files changed, 1222 insertions, 0 deletions
diff --git a/plugins/Sitemap/SitemapPlugin.php b/plugins/Sitemap/SitemapPlugin.php new file mode 100644 index 000000000..7ef5f1aa9 --- /dev/null +++ b/plugins/Sitemap/SitemapPlugin.php @@ -0,0 +1,163 @@ +<?php +/** + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2010, StatusNet, Inc. + * + * Creates a dynamic sitemap for a StatusNet site + * + * PHP version 5 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + // This check helps protect against security problems; + // your code file can't be executed directly from the web. + exit(1); +} + +/** + * Sitemap plugin + * + * @category Sample + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0 + * @link http://status.net/ + */ + +class SitemapPlugin extends Plugin +{ + const USERS_PER_MAP = 50000; + const NOTICES_PER_MAP = 50000; + + /** + * Load related modules when needed + * + * @param string $cls Name of the class to be loaded + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onAutoload($cls) + { + $dir = dirname(__FILE__); + + switch ($cls) + { + case 'Sitemap_user_count': + case 'Sitemap_notice_count': + require_once $dir . '/' . $cls . '.php'; + return false; + case 'SitemapindexAction': + case 'NoticesitemapAction': + case 'UsersitemapAction': + require_once $dir . '/' . strtolower(mb_substr($cls, 0, -6)) . '.php'; + return false; + case 'SitemapAction': + require_once $dir . '/' . strtolower($cls) . '.php'; + return false; + default: + return true; + } + } + + /** + * Add sitemap-related information at the end of robots.txt + * + * @param Action $action Action being run + * + * @return boolean hook value. + */ + + function onEndRobotsTxt($action) + { + $url = common_local_url('sitemapindex'); + + print "\nSitemap: $url\n"; + + return true; + } + + /** + * Map URLs to actions + * + * @param Net_URL_Mapper $m path-to-action mapper + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onRouterInitialized($m) + { + $m->connect('sitemapindex.xml', + array('action' => 'sitemapindex')); + + $m->connect('/notice-sitemap-:year-:month-:day-:index.xml', + array('action' => 'noticesitemap'), + array('year' => '[0-9]{4}', + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); + + $m->connect('/user-sitemap-:year-:month-:day-:index.xml', + array('action' => 'usersitemap'), + array('year' => '[0-9]{4}', + 'month' => '[01][0-9]', + 'day' => '[0123][0-9]', + 'index' => '[1-9][0-9]*')); + return true; + } + + /** + * Database schema setup + * + * We cache some data persistently to avoid overlong queries. + * + * @see Sitemap_user_count + * @see Sitemap_notice_count + * + * @return boolean hook value; true means continue processing, false means stop. + */ + + function onCheckSchema() + { + $schema = Schema::get(); + + $schema->ensureTable('sitemap_user_count', + array(new ColumnDef('registration_date', 'date', null, + true, 'PRI'), + new ColumnDef('user_count', 'integer'), + new ColumnDef('created', 'datetime', + null, false), + new ColumnDef('modified', 'timestamp'))); + + $schema->ensureTable('sitemap_notice_count', + array(new ColumnDef('notice_date', 'date', null, + true, 'PRI'), + new ColumnDef('notice_count', 'integer'), + new ColumnDef('created', 'datetime', + null, false), + new ColumnDef('modified', 'timestamp'))); + + return true; + } +} diff --git a/plugins/Sitemap/Sitemap_notice_count.php b/plugins/Sitemap/Sitemap_notice_count.php new file mode 100644 index 000000000..673417b78 --- /dev/null +++ b/plugins/Sitemap/Sitemap_notice_count.php @@ -0,0 +1,287 @@ +<?php +/** + * Data class for counting notice postings by date + * + * PHP version 5 + * + * @category Data + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2010, StatusNet, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +require_once INSTALLDIR . '/classes/Memcached_DataObject.php'; + +/** + * Data class for counting notices by date + * + * We make a separate sitemap for each notice posted by date. + * To save ourselves some (not inconsiderable) processing effort, + * we cache this data in the sitemap_notice_count table. Each + * row represents a day since the site has been started, with a count + * of notices posted on that day. Since, after the end of the day, + * this number doesn't change, it's a good candidate for persistent caching. + * + * @category Data + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * @see DB_DataObject + */ + +class Sitemap_notice_count extends Memcached_DataObject +{ + public $__table = 'sitemap_notice_count'; // table name + + public $notice_date; // date primary_key not_null + public $notice_count; // int(4) + public $created; + public $modified; + + /** + * Get an instance by key + * + * This is a utility method to get a single instance with a given key value. + * + * @param string $k Key to use to lookup (usually 'notice_id' for this class) + * @param mixed $v Value to lookup + * + * @return Sitemap_notice_count object found, or null for no hits + * + */ + + function staticGet($k, $v=null) + { + return Memcached_DataObject::staticGet('Sitemap_notice_count', $k, $v); + } + + /** + * return table definition for DB_DataObject + * + * DB_DataObject needs to know something about the table to manipulate + * instances. This method provides all the DB_DataObject needs to know. + * + * @return array array of column definitions + */ + + function table() + { + return array('notice_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL, + 'notice_count' => DB_DATAOBJECT_INT, + 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, + 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); + } + + /** + * return key definitions for DB_DataObject + * + * DB_DataObject needs to know about keys that the table has; this function + * defines them. + * + * @return array key definitions + */ + + function keys() + { + return array('notice_date' => 'K'); + } + + /** + * return key definitions for Memcached_DataObject + * + * Our caching system uses the same key definitions, but uses a different + * method to get them. + * + * @return array key definitions + */ + + function keyTypes() + { + return $this->keys(); + } + + static function getAll() + { + $noticeCounts = self::cacheGet('sitemap:notice:counts'); + + if ($noticeCounts === false) { + + $snc = new Sitemap_notice_count(); + $snc->orderBy('notice_date DESC'); + + // Fetch the first one to check up-to-date-itude + + $n = $snc->find(true); + + $today = self::today(); + $noticeCounts = array(); + + if (!$n) { // No counts saved yet + $noticeCounts = self::initializeCounts(); + } else if ($snc->notice_date < $today) { // There are counts but not up to today + $noticeCounts = self::fillInCounts($snc->notice_date); + } else if ($snc->notice_date == $today) { // Refresh today's + $noticeCounts[$today] = self::updateToday(); + } + + // starts with second-to-last date + + while ($snc->fetch()) { + $noticeCounts[$snc->notice_date] = $snc->notice_count; + } + + self::cacheSet('sitemap:notice:counts', $noticeCounts); + } + + return $noticeCounts; + } + + static function initializeCounts() + { + $firstDate = self::getFirstDate(); // awww + $today = self::today(); + + $counts = array(); + + for ($d = $firstDate; $d <= $today; $d = self::incrementDay($d)) { + $n = self::getCount($d); + self::insertCount($d, $n); + $counts[$d] = $n; + } + + return $counts; + } + + static function fillInCounts($lastDate) + { + $today = self::today(); + + $counts = array(); + + $n = self::getCount($lastDate); + self::updateCount($lastDate, $n); + + $counts[$lastDate] = $n; + + for ($d = self::incrementDay($lastDate); $d <= $today; $d = self::incrementDay($d)) { + $n = self::getCount($d); + self::insertCount($d, $n); + } + + return $counts; + } + + static function updateToday() + { + $today = self::today(); + + $n = self::getCount($today); + self::updateCount($today, $n); + + return $n; + } + + static function getCount($d) + { + $notice = new Notice(); + $notice->whereAdd('created BETWEEN "'.$d.' 00:00:00" AND "'.self::incrementDay($d).' 00:00:00"'); + $n = $notice->count(); + + return $n; + } + + static function insertCount($d, $n) + { + $snc = new Sitemap_notice_count(); + + $snc->notice_date = DB_DataObject_Cast::date($d); + + $snc->notice_count = $n; + $snc->created = common_sql_now(); + $snc->modified = $snc->created; + + if (!$snc->insert()) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function updateCount($d, $n) + { + $snc = Sitemap_notice_count::staticGet('notice_date', DB_DataObject_Cast::date($d)); + + if (empty($snc)) { + throw new Exception("No such registration date: $d"); + } + + $orig = clone($snc); + + $snc->notice_date = DB_DataObject_Cast::date($d); + + $snc->notice_count = $n; + $snc->created = common_sql_now(); + $snc->modified = $snc->created; + + if (!$snc->update($orig)) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function incrementDay($d) + { + $dt = self::dateStrToInt($d); + return self::dateIntToStr($dt + 24 * 60 * 60); + } + + static function dateStrToInt($d) + { + return strtotime($d.' 00:00:00'); + } + + static function dateIntToStr($dt) + { + return date('Y-m-d', $dt); + } + + static function getFirstDate() + { + $n = new Notice(); + + $n->selectAdd(); + $n->selectAdd('date(min(created)) as first_date'); + + if ($n->find(true)) { + return $n->first_date; + } else { + // Is this right? + return self::dateIntToStr(time()); + } + } + + static function today() + { + return self::dateIntToStr(time()); + } +} diff --git a/plugins/Sitemap/Sitemap_user_count.php b/plugins/Sitemap/Sitemap_user_count.php new file mode 100644 index 000000000..64b4c3442 --- /dev/null +++ b/plugins/Sitemap/Sitemap_user_count.php @@ -0,0 +1,284 @@ +<?php +/** + * Data class for counting user registrations by date + * + * PHP version 5 + * + * @category Data + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2010, StatusNet, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +require_once INSTALLDIR . '/classes/Memcached_DataObject.php'; + +/** + * Data class for counting users by date + * + * We make a separate sitemap for each user registered by date. + * To save ourselves some processing effort, we cache this data + * + * @category Action + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * @see DB_DataObject + */ + +class Sitemap_user_count extends Memcached_DataObject +{ + public $__table = 'sitemap_user_count'; // table name + + public $registration_date; // date primary_key not_null + public $user_count; // int(4) + public $created; + public $modified; + + /** + * Get an instance by key + * + * This is a utility method to get a single instance with a given key value. + * + * @param string $k Key to use to lookup (usually 'user_id' for this class) + * @param mixed $v Value to lookup + * + * @return Sitemap_user_count object found, or null for no hits + * + */ + + function staticGet($k, $v=null) + { + return Memcached_DataObject::staticGet('Sitemap_user_count', $k, $v); + } + + /** + * return table definition for DB_DataObject + * + * DB_DataObject needs to know something about the table to manipulate + * instances. This method provides all the DB_DataObject needs to know. + * + * @return array array of column definitions + */ + + function table() + { + return array('registration_date' => DB_DATAOBJECT_DATE + DB_DATAOBJECT_NOTNULL, + 'user_count' => DB_DATAOBJECT_INT, + 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, + 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); + } + + /** + * return key definitions for DB_DataObject + * + * DB_DataObject needs to know about keys that the table has; this function + * defines them. + * + * @return array key definitions + */ + + function keys() + { + return array('registration_date' => 'K'); + } + + function sequenceKey() + { + return array(false, false, false); + } + + /** + * return key definitions for Memcached_DataObject + * + * Our caching system uses the same key definitions, but uses a different + * method to get them. + * + * @return array key definitions + */ + + function keyTypes() + { + return $this->keys(); + } + + static function getAll() + { + $userCounts = self::cacheGet('sitemap:user:counts'); + + if ($userCounts === false) { + + $suc = new Sitemap_user_count(); + $suc->orderBy('registration_date DESC'); + + // Fetch the first one to check up-to-date-itude + + $n = $suc->find(true); + + $today = self::today(); + $userCounts = array(); + + if (!$n) { // No counts saved yet + $userCounts = self::initializeCounts(); + } else if ($suc->registration_date < $today) { // There are counts but not up to today + $userCounts = self::fillInCounts($suc->registration_date); + } else if ($suc->registration_date == $today) { // Refresh today's + $userCounts[$today] = self::updateToday(); + } + + // starts with second-to-last date + + while ($suc->fetch()) { + $userCounts[$suc->registration_date] = $suc->user_count; + } + + self::cacheSet('sitemap:user:counts', $userCounts); + } + + return $userCounts; + } + + static function initializeCounts() + { + $firstDate = self::getFirstDate(); // awww + $today = self::today(); + + $counts = array(); + + for ($d = $firstDate; $d <= $today; $d = self::incrementDay($d)) { + $n = self::getCount($d); + self::insertCount($d, $n); + $counts[$d] = $n; + } + + return $counts; + } + + static function fillInCounts($lastDate) + { + $today = self::today(); + + $counts = array(); + + $n = self::getCount($lastDate); + self::updateCount($lastDate, $n); + + $counts[$lastDate] = $n; + + for ($d = self::incrementDay($lastDate); $d <= $today; $d = self::incrementDay($d)) { + $n = self::getCount($d); + self::insertCount($d, $n); + } + + return $counts; + } + + static function updateToday() + { + $today = self::today(); + + $n = self::getCount($today); + self::updateCount($today, $n); + + return $n; + } + + static function getCount($d) + { + $user = new User(); + $user->whereAdd('created BETWEEN "'.$d.' 00:00:00" AND "'.self::incrementDay($d).' 00:00:00"'); + $n = $user->count(); + + return $n; + } + + static function insertCount($d, $n) + { + $suc = new Sitemap_user_count(); + + $suc->registration_date = DB_DataObject_Cast::date($d); + $suc->user_count = $n; + $suc->created = common_sql_now(); + $suc->modified = $suc->created; + + if (!$suc->insert()) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function updateCount($d, $n) + { + $suc = Sitemap_user_count::staticGet('registration_date', DB_DataObject_Cast::date($d)); + + if (empty($suc)) { + throw new Exception("No such registration date: $d"); + } + + $orig = clone($suc); + + $suc->registration_date = DB_DataObject_Cast::date($d); + $suc->user_count = $n; + $suc->created = common_sql_now(); + $suc->modified = $suc->created; + + if (!$suc->update($orig)) { + common_log(LOG_WARNING, "Could not save user counts for '$d'"); + } + } + + static function incrementDay($d) + { + $dt = self::dateStrToInt($d); + return self::dateIntToStr($dt + 24 * 60 * 60); + } + + static function dateStrToInt($d) + { + return strtotime($d.' 00:00:00'); + } + + static function dateIntToStr($dt) + { + return date('Y-m-d', $dt); + } + + static function getFirstDate() + { + $u = new User(); + $u->selectAdd(); + $u->selectAdd('date(min(created)) as first_date'); + if ($u->find(true)) { + return $u->first_date; + } else { + // Is this right? + return self::dateIntToStr(time()); + } + } + + static function today() + { + return self::dateIntToStr(time()); + } +} diff --git a/plugins/Sitemap/noticesitemap.php b/plugins/Sitemap/noticesitemap.php new file mode 100644 index 000000000..9f323f72a --- /dev/null +++ b/plugins/Sitemap/noticesitemap.php @@ -0,0 +1,137 @@ +<?php +/** + * StatusNet, the distributed open-source microblogging tool + * + * Show list of user pages + * + * PHP version 5 + * + * LICENCE: This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class NoticesitemapAction extends SitemapAction +{ + var $notices = null; + var $j = 0; + + function prepare($args) + { + parent::prepare($args); + + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + + $i = $this->trimmed('index'); + + $y += 0; + $m += 0; + $d += 0; + $i += 0; + + $this->notices = $this->getNotices($y, $m, $d, $i); + $this->j = 0; + + return true; + } + + function nextUrl() + { + if ($this->j < count($this->notices)) { + $n = $this->notices[$this->j]; + $this->j++; + return array(common_local_url('shownotice', array('notice' => $n[0])), + common_date_w3dtf($n[1]), + 'never', + null); + } else { + return null; + } + } + + function getNotices($y, $m, $d, $i) + { + $n = Notice::cacheGet("sitemap:notice:$y:$m:$d:$i"); + + if ($n === false) { + + $notice = new Notice(); + + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); + + $notice->selectAdd(); + $notice->selectAdd('id, created'); + + $notice->whereAdd("created >= '$begindt'"); + $notice->whereAdd("created < '$enddt'"); + + $notice->whereAdd('is_local != 0'); + + $notice->orderBy('created'); + + $offset = ($i-1) * SitemapPlugin::NOTICES_PER_MAP; + $limit = SitemapPlugin::NOTICES_PER_MAP; + + $notice->limit($offset, $limit); + + $notice->find(); + + $n = array(); + + while ($notice->fetch()) { + $n[] = array($notice->id, $notice->created); + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:notice:$y:$m:$d:$i"), + $n, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } + } + + return $n; + } +} diff --git a/plugins/Sitemap/sitemapaction.php b/plugins/Sitemap/sitemapaction.php new file mode 100644 index 000000000..45edfccc5 --- /dev/null +++ b/plugins/Sitemap/sitemapaction.php @@ -0,0 +1,95 @@ +<?php +/** + * StatusNet, the distributed open-source microblogging tool + * + * Superclass for sitemap-generating actions + * + * PHP version 5 + * + * LICENCE: This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * superclass for sitemap actions + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('urlset', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); + + while (list($url, $lm, $cf, $p) = $this->nextUrl()) { + $this->showUrl($url, $lm, $cf, $p); + } + + $this->elementEnd('urlset'); + + $this->endXML(); + } + + function showUrl($url, $lastMod=null, $changeFreq=null, $priority=null) + { + $this->elementStart('url'); + $this->element('loc', null, $url); + if (!is_null($lastMod)) { + $this->element('lastmod', null, $lastMod); + } + if (!is_null($changeFreq)) { + $this->element('changefreq', null, $changeFreq); + } + if (!is_null($priority)) { + $this->element('priority', null, $priority); + } + $this->elementEnd('url'); + } + + function nextUrl() + { + return null; + } + + function isReadOnly() + { + return true; + } +} diff --git a/plugins/Sitemap/sitemapindex.php b/plugins/Sitemap/sitemapindex.php new file mode 100644 index 000000000..169e3031c --- /dev/null +++ b/plugins/Sitemap/sitemapindex.php @@ -0,0 +1,128 @@ +<?php +/** + * StatusNet, the distributed open-source microblogging tool + * + * Generate sitemap index + * + * PHP version 5 + * + * LICENCE: This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * Show the sitemap index + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class SitemapindexAction extends Action +{ + /** + * handle the action + * + * @param array $args unused. + * + * @return void + */ + + function handle($args) + { + header('Content-Type: text/xml; charset=UTF-8'); + $this->startXML(); + + $this->elementStart('sitemapindex', array('xmlns' => 'http://www.sitemaps.org/schemas/sitemap/0.9')); + + $this->showNoticeSitemaps(); + $this->showUserSitemaps(); + + $this->elementEnd('sitemapindex'); + + $this->endXML(); + } + + function showUserSitemaps() + { + $userCounts = Sitemap_user_count::getAll(); + + foreach ($userCounts as $dt => $cnt) { + $cnt = $cnt+0; + + if ($cnt == 0) { + continue; + } + + $n = (int)$cnt / (int)SitemapPlugin::USERS_PER_MAP; + if (($cnt % SitemapPlugin::USERS_PER_MAP) != 0) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('user', $dt, $i); + } + } + } + + function showNoticeSitemaps() + { + $noticeCounts = Sitemap_notice_count::getAll(); + + foreach ($noticeCounts as $dt => $cnt) { + if ($cnt == 0) { + continue; + } + $n = $cnt / SitemapPlugin::NOTICES_PER_MAP; + if ($cnt % SitemapPlugin::NOTICES_PER_MAP) { + $n++; + } + for ($i = 1; $i <= $n; $i++) { + $this->showSitemap('notice', $dt, $i); + } + } + } + + function showSitemap($prefix, $dt, $i) + { + list($y, $m, $d) = explode('-', $dt); + + $this->elementStart('sitemap'); + $this->element('loc', null, common_local_url($prefix.'sitemap', + array('year' => $y, + 'month' => $m, + 'day' => $d, + 'index' => $i))); + + $begdate = strtotime("$y-$m-$d 00:00:00"); + $enddate = $begdate + (24 * 60 * 60); + + if ($enddate < time()) { + $this->element('lastmod', null, date(DATE_W3C, $enddate)); + } + + $this->elementEnd('sitemap'); + } +} diff --git a/plugins/Sitemap/usersitemap.php b/plugins/Sitemap/usersitemap.php new file mode 100644 index 000000000..de1200715 --- /dev/null +++ b/plugins/Sitemap/usersitemap.php @@ -0,0 +1,128 @@ +<?php +/** + * StatusNet, the distributed open-source microblogging tool + * + * Show list of user pages + * + * PHP version 5 + * + * LICENCE: This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * sitemap for users + * + * @category Sitemap + * @package StatusNet + * @author Evan Prodromou <evan@status.net> + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0 + * @link http://status.net/ + */ + +class UsersitemapAction extends SitemapAction +{ + var $users = null; + var $j = 0; + + function prepare($args) + { + parent::prepare($args); + + $y = $this->trimmed('year'); + + $m = $this->trimmed('month'); + $d = $this->trimmed('day'); + + $i = $this->trimmed('index'); + + $y += 0; + $m += 0; + $d += 0; + $i += 0; + + $this->users = $this->getUsers($y, $m, $d, $i); + $this->j = 0; + return true; + } + + function nextUrl() + { + if ($this->j < count($this->users)) { + $nickname = $this->users[$this->j]; + $this->j++; + return array(common_profile_url($nickname), null, null, '1.0'); + } else { + return null; + } + } + + function getUsers($y, $m, $d, $i) + { + $u = User::cacheGet("sitemap:user:$y:$m:$d:$i"); + + if ($u === false) { + + $user = new User(); + + $begindt = sprintf('%04d-%02d-%02d 00:00:00', $y, $m, $d); + + // XXX: estimates 1d == 24h, which screws up days + // with leap seconds (1d == 24h + 1s). Thankfully they're + // few and far between. + + $theend = strtotime($begindt) + (24 * 60 * 60); + $enddt = common_sql_date($theend); + + $user->selectAdd(); + $user->selectAdd('nickname'); + $user->whereAdd("created >= '$begindt'"); + $user->whereAdd("created < '$enddt'"); + + $user->orderBy('created'); + + $offset = ($i-1) * SitemapPlugin::USERS_PER_MAP; + $limit = SitemapPlugin::USERS_PER_MAP; + + $user->limit($offset, $limit); + + $user->find(); + + while ($user->fetch()) { + $u[] = $user->nickname; + } + + $c = Cache::instance(); + + if (!empty($c)) { + $c->set(Cache::key("sitemap:user:$y:$m:$d:$i"), + $u, + Cache::COMPRESSED, + ((time() > $theend) ? (time() + 90 * 24 * 60 * 60) : (time() + 5 * 60))); + } + } + + return $u; + } +} |