From 74a03cbe1fecda9764f826c088331cc4ffbb9433 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 28 Jan 2010 14:27:35 -0500 Subject: always set up database_rw, regardless, so cached sessions work --- index.php | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'index.php') diff --git a/index.php b/index.php index b5edc0f94..5520d690b 100644 --- a/index.php +++ b/index.php @@ -152,6 +152,16 @@ function checkMirror($action_obj, $args) static $alwaysRW = array('session', 'remember_me'); + // We ensure that these tables always are used + // on the master DB + + $config['db']['database_rw'] = $config['db']['database']; + $config['db']['ini_rw'] = INSTALLDIR.'/classes/statusnet.ini'; + + foreach ($alwaysRW as $table) { + $config['db']['table_'.$table] = 'rw'; + } + if (common_config('db', 'mirror') && $action_obj->isReadOnly($args)) { if (is_array(common_config('db', 'mirror'))) { // "load balancing", ha ha @@ -162,16 +172,6 @@ function checkMirror($action_obj, $args) $mirror = common_config('db', 'mirror'); } - // We ensure that these tables always are used - // on the master DB - - $config['db']['database_rw'] = $config['db']['database']; - $config['db']['ini_rw'] = INSTALLDIR.'/classes/statusnet.ini'; - - foreach ($alwaysRW as $table) { - $config['db']['table_'.$table] = 'rw'; - } - // everyone else uses the mirror $config['db']['database'] = $mirror; -- cgit v1.2.3-54-g00ecf From fa7895333724e314e2b32cb89f19a41069c554be Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 28 Jan 2010 16:35:38 -0500 Subject: move RW setup above user get in index.php so remember_me works --- index.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'index.php') diff --git a/index.php b/index.php index 5520d690b..5aa40440a 100644 --- a/index.php +++ b/index.php @@ -146,7 +146,7 @@ function formatBacktraceLine($n, $line) return $out; } -function checkMirror($action_obj, $args) +function setupRW() { global $config; @@ -161,7 +161,10 @@ function checkMirror($action_obj, $args) foreach ($alwaysRW as $table) { $config['db']['table_'.$table] = 'rw'; } +} +function checkMirror($action_obj, $args) +{ if (common_config('db', 'mirror') && $action_obj->isReadOnly($args)) { if (is_array(common_config('db', 'mirror'))) { // "load balancing", ha ha @@ -237,9 +240,13 @@ function main() PEAR::setErrorHandling(PEAR_ERROR_CALLBACK, 'handleError'); + // Make sure RW database is setup + + setupRW(); + // XXX: we need a little more structure in this script - // get and cache current user + // get and cache current user (may hit RW!) $user = common_current_user(); -- cgit v1.2.3-54-g00ecf From be7bca2303cc9900f2c1a746a10a785d9d95783c Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 28 Jan 2010 16:50:28 -0500 Subject: Revert "move RW setup above user get in index.php so remember_me works" This reverts commit fa7895333724e314e2b32cb89f19a41069c554be. --- index.php | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'index.php') diff --git a/index.php b/index.php index 5aa40440a..5520d690b 100644 --- a/index.php +++ b/index.php @@ -146,7 +146,7 @@ function formatBacktraceLine($n, $line) return $out; } -function setupRW() +function checkMirror($action_obj, $args) { global $config; @@ -161,10 +161,7 @@ function setupRW() foreach ($alwaysRW as $table) { $config['db']['table_'.$table] = 'rw'; } -} -function checkMirror($action_obj, $args) -{ if (common_config('db', 'mirror') && $action_obj->isReadOnly($args)) { if (is_array(common_config('db', 'mirror'))) { // "load balancing", ha ha @@ -240,13 +237,9 @@ function main() PEAR::setErrorHandling(PEAR_ERROR_CALLBACK, 'handleError'); - // Make sure RW database is setup - - setupRW(); - // XXX: we need a little more structure in this script - // get and cache current user (may hit RW!) + // get and cache current user $user = common_current_user(); -- cgit v1.2.3-54-g00ecf From a33194effb350a03dcdf1c0683fb15d575d245e5 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 28 Jan 2010 16:52:05 -0500 Subject: Revert "Revert "move RW setup above user get in index.php so remember_me works"" This reverts commit be7bca2303cc9900f2c1a746a10a785d9d95783c. --- index.php | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'index.php') diff --git a/index.php b/index.php index 5520d690b..5aa40440a 100644 --- a/index.php +++ b/index.php @@ -146,7 +146,7 @@ function formatBacktraceLine($n, $line) return $out; } -function checkMirror($action_obj, $args) +function setupRW() { global $config; @@ -161,7 +161,10 @@ function checkMirror($action_obj, $args) foreach ($alwaysRW as $table) { $config['db']['table_'.$table] = 'rw'; } +} +function checkMirror($action_obj, $args) +{ if (common_config('db', 'mirror') && $action_obj->isReadOnly($args)) { if (is_array(common_config('db', 'mirror'))) { // "load balancing", ha ha @@ -237,9 +240,13 @@ function main() PEAR::setErrorHandling(PEAR_ERROR_CALLBACK, 'handleError'); + // Make sure RW database is setup + + setupRW(); + // XXX: we need a little more structure in this script - // get and cache current user + // get and cache current user (may hit RW!) $user = common_current_user(); -- cgit v1.2.3-54-g00ecf From 63a0e84a8b94d84b106431b648ec76e2537ab9c6 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 28 Jan 2010 16:52:42 -0500 Subject: lost config in index.php made all traffic go to master --- index.php | 2 ++ 1 file changed, 2 insertions(+) (limited to 'index.php') diff --git a/index.php b/index.php index 5aa40440a..605b380bf 100644 --- a/index.php +++ b/index.php @@ -165,6 +165,8 @@ function setupRW() function checkMirror($action_obj, $args) { + global $config; + if (common_config('db', 'mirror') && $action_obj->isReadOnly($args)) { if (is_array(common_config('db', 'mirror'))) { // "load balancing", ha ha -- cgit v1.2.3-54-g00ecf From dc62246443e3584ef5267505275f618f6fa86bf7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 31 Jan 2010 10:12:26 -0500 Subject: Add a robots.txt URL to the site root Adds a robots.txt file to the site root. Defaults defined by 'robotstxt' section of config. New events StartRobotsTxt and EndRobotsTxt to let plugins add information. Probably not useful if path is not /, but won't hurt anything, either. --- EVENTS.txt | 6 +++ README | 14 +++++++ actions/robotstxt.php | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++ index.php | 5 ++- lib/default.php | 4 ++ lib/router.php | 2 + 6 files changed, 129 insertions(+), 2 deletions(-) create mode 100644 actions/robotstxt.php (limited to 'index.php') diff --git a/EVENTS.txt b/EVENTS.txt index 3317c80de..6bf12bf13 100644 --- a/EVENTS.txt +++ b/EVENTS.txt @@ -708,3 +708,9 @@ EndUserRegister: When a new user has been registered - &$profile: new profile data - &$user: new user account +StartRobotsTxt: Before outputting the robots.txt page +- &$action: RobotstxtAction being shown + +EndRobotsTxt: After the default robots.txt page (good place for customization) +- &$action: RobotstxtAction being shown + diff --git a/README b/README index da278f741..4e576dcdd 100644 --- a/README +++ b/README @@ -1496,6 +1496,20 @@ interface. It also makes the user's profile the root URL. enabled: Whether to run in "single user mode". Default false. nickname: nickname of the single user. +robotstxt +--------- + +We put out a default robots.txt file to guide the processing of +Web crawlers. See http://www.robotstxt.org/ for more information +on the format of this file. + +crawldelay: if non-empty, this value is provided as the Crawl-Delay: + for the robots.txt file. see http://ur1.ca/l5a0 + for more information. Default is zero, no explicit delay. +disallow: Array of (virtual) directories to disallow. Default is 'main', + 'search', 'message', 'settings', 'admin'. Ignored when site + is private, in which case the entire site ('/') is disallowed. + Plugins ======= diff --git a/actions/robotstxt.php b/actions/robotstxt.php new file mode 100644 index 000000000..5131097c8 --- /dev/null +++ b/actions/robotstxt.php @@ -0,0 +1,100 @@ + + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * Prints out a static robots.txt + * + * @category Action + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl.html AGPLv3 + * @link http://status.net/ + */ + +class RobotstxtAction extends Action +{ + /** + * Handles requests + * + * Since this is a relatively static document, we + * don't do a prepare() + * + * @param array $args GET, POST, and URL params; unused. + * + * @return void + */ + + function handle($args) + { + if (Event::handle('StartRobotsTxt', array($this))) { + + header('Content-Type: text/plain'); + + print "User-Agent: *\n"; + + if (common_config('site', 'private')) { + + print "Disallow: /\n"; + + } else { + + $disallow = common_config('robotstxt', 'disallow'); + + foreach ($disallow as $dir) { + print "Disallow: /$dir/\n"; + } + + $crawldelay = common_config('robotstxt', 'crawldelay'); + + if (!empty($crawldelay)) { + print "Crawl-delay: " . $crawldelay . "\n"; + } + } + + Event::handle('EndRobotsTxt', array($this)); + } + } + + /** + * Return true; this page doesn't touch the DB. + * + * @param array $args other arguments + * + * @return boolean is read only action? + */ + + function isReadOnly($args) + { + return true; + } +} diff --git a/index.php b/index.php index 605b380bf..06ff9900f 100644 --- a/index.php +++ b/index.php @@ -285,8 +285,9 @@ function main() if (!$user && common_config('site', 'private') && !isLoginAction($action) && !preg_match('/rss$/', $action) - && !preg_match('/^Api/', $action) - ) { + && $action != 'robotstxt' + && !preg_match('/^Api/', $action)) { + // set returnto $rargs =& common_copy_args($args); unset($rargs['action']); diff --git a/lib/default.php b/lib/default.php index 1337a9633..2bedc4bf0 100644 --- a/lib/default.php +++ b/lib/default.php @@ -270,4 +270,8 @@ $default = 'singleuser' => array('enabled' => false, 'nickname' => null), + 'robotstxt' => + array('crawldelay' => 0, + 'disallow' => array('main', 'settings', 'admin', 'search', 'message') + ), ); diff --git a/lib/router.php b/lib/router.php index ca9f32812..4b5b8d0bb 100644 --- a/lib/router.php +++ b/lib/router.php @@ -73,6 +73,8 @@ class Router if (Event::handle('StartInitializeRouter', array(&$m))) { + $m->connect('robots.txt', array('action' => 'robotstxt')); + $m->connect('opensearch/people', array('action' => 'opensearch', 'type' => 'people')); $m->connect('opensearch/notice', array('action' => 'opensearch', -- cgit v1.2.3-54-g00ecf