summaryrefslogtreecommitdiff
path: root/plugins/SphinxSearch
diff options
context:
space:
mode:
authorZach Copley <zach@status.net>2009-11-19 20:12:46 -0800
committerZach Copley <zach@status.net>2009-11-19 20:12:46 -0800
commit4b98edf75f4e255f8c61087bd1525d89653a521f (patch)
treeb2a7eb6d77429eadb1beabe2d5e6ae1c1a2831d6 /plugins/SphinxSearch
parentf92574dbcb1f2d7cd0aaf3c9362db46fa066e888 (diff)
parentc213477081afefb1720c8ae729d1965e7a1dac63 (diff)
Merge branch '0.9-release'
* 0.9-release: (874 commits) Removed call to NewDirectMessage() until IE return is fixed i.e., Don't show flag user button your own profile Fixed HXR response for flag user Using the right form class name Using common_redirect Left a form_data class of a <ul> in the user admin panel Added validation to fields in user admin panel Added a user admin panel Added mobile logos for default and identica themes Changed gif to png Changed this to action. THANKS zach! Doing content negotiation only once Add execute bit to pingqueuehandler Localisation updates for !StatusNet from !translatewiki.net Use the browser's geolocation API to set the location on the notice form Add geometa library, and include it. Add location form elements to the noticeform, and save their values on submission Use the $user object nickname, as login name doesnt have to == nickname anymore with plugins such as ldap/etc Revert "Re added NICKNAME_FMT constant to router.php." Moved most path and server settings to a new paths admin panel ... Conflicts: js/util.js locale/it_IT/LC_MESSAGES/statusnet.mo locale/mk_MK/LC_MESSAGES/statusnet.mo locale/mk_MK/LC_MESSAGES/statusnet.po locale/pt_BR/LC_MESSAGES/statusnet.mo locale/vi_VN/LC_MESSAGES/statusnet.mo plugins/InfiniteScroll/infinitescroll.js plugins/Realtime/realtimeupdate.js
Diffstat (limited to 'plugins/SphinxSearch')
-rw-r--r--plugins/SphinxSearch/README45
-rw-r--r--plugins/SphinxSearch/SphinxSearchPlugin.php100
-rwxr-xr-xplugins/SphinxSearch/scripts/gen_config.php126
-rwxr-xr-xplugins/SphinxSearch/scripts/index_update.php61
-rw-r--r--plugins/SphinxSearch/scripts/sphinx-utils.php63
-rwxr-xr-xplugins/SphinxSearch/scripts/sphinx.sh15
-rw-r--r--plugins/SphinxSearch/sphinx.conf.sample71
-rw-r--r--plugins/SphinxSearch/sphinxsearch.php96
8 files changed, 577 insertions, 0 deletions
diff --git a/plugins/SphinxSearch/README b/plugins/SphinxSearch/README
new file mode 100644
index 000000000..5a2c063bd
--- /dev/null
+++ b/plugins/SphinxSearch/README
@@ -0,0 +1,45 @@
+You can get a significant boost in performance using Sphinx Search
+instead of your database server to search for users and notices.
+<http://sphinxsearch.com/>.
+
+Configuration
+-------------
+
+In StatusNet's configuration, you can adjust the following settings
+under 'sphinx':
+
+enabled: Set to true to enable. Default false.
+server: a string with the hostname of the sphinx server.
+port: an integer with the port number of the sphinx server.
+
+
+Requirements
+------------
+
+To use a Sphinx server to search users and notices, you also need
+to install, compile and enable the sphinx pecl extension for php on the
+client side, which itself depends on the sphinx development files.
+"pecl install sphinx" should take care of that. Add "extension=sphinx.so"
+to your php.ini and reload apache to enable it.
+
+You can update your MySQL or Postgresql databases to drop their fulltext
+search indexes, since they're now provided by sphinx.
+
+
+You will also need a Sphinx server to serve the search queries.
+
+On the sphinx server side, a script reads the main database and build
+the keyword index. A cron job reads the database and keeps the sphinx
+indexes up to date. scripts/sphinx-cron.sh should be called by cron
+every 5 minutes, for example. scripts/sphinx.sh is an init.d script
+to start and stop the sphinx search daemon.
+
+
+Server configuration
+--------------------
+scripts/gen_config.php can generate a sphinx.conf file listing MySQL
+data sources for your databases. You may need to tweak paths afterwards.
+
+ $ plugins/SphinxSearch/scripts/gen_config.php > sphinx.conf
+
+If you wish, you can build a full config yourself based on sphinx.conf.sample
diff --git a/plugins/SphinxSearch/SphinxSearchPlugin.php b/plugins/SphinxSearch/SphinxSearchPlugin.php
new file mode 100644
index 000000000..7a27a4c04
--- /dev/null
+++ b/plugins/SphinxSearch/SphinxSearchPlugin.php
@@ -0,0 +1,100 @@
+<?php
+/**
+ * StatusNet, the distributed open-source microblogging tool
+ *
+ * PHP version 5
+ *
+ * LICENCE: This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * @category Plugin
+ * @package StatusNet
+ * @author Brion Vibber <brion@status.net>
+ * @copyright 2009 Control Yourself, Inc.
+ * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
+ * @link http://laconi.ca/
+ */
+
+if (!defined('STATUSNET')) {
+ exit(1);
+}
+
+// Set defaults if not already set in the config array...
+global $config;
+$sphinxDefaults =
+ array('enabled' => true,
+ 'server' => 'localhost',
+ 'port' => 3312);
+foreach($sphinxDefaults as $key => $val) {
+ if (!isset($config['sphinx'][$key])) {
+ $config['sphinx'][$key] = $val;
+ }
+}
+
+
+
+/**
+ * Plugin for Sphinx search backend.
+ *
+ * @category Plugin
+ * @package StatusNet
+ * @author Brion Vibber <brion@status.net>
+ * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
+ * @link http://laconi.ca/
+ * @link http://twitter.com/
+ */
+
+class SphinxSearchPlugin extends Plugin
+{
+ /**
+ * Automatically load any classes used
+ *
+ * @param string $cls the class
+ * @return boolean hook return
+ */
+ function onAutoload($cls)
+ {
+ switch ($cls) {
+ case 'SphinxSearch':
+ include_once INSTALLDIR . '/plugins/SphinxSearch/' .
+ strtolower($cls) . '.php';
+ return false;
+ default:
+ return true;
+ }
+ }
+
+ /**
+ * Create sphinx search engine object for the given table type.
+ *
+ * @param Memcached_DataObject $target
+ * @param string $table
+ * @param out &$search_engine SearchEngine object on output if successful
+ * @ return boolean hook return
+ */
+ function onGetSearchEngine(Memcached_DataObject $target, $table, &$search_engine)
+ {
+ if (common_config('sphinx', 'enabled')) {
+ if (!class_exists('SphinxClient')) {
+ throw new ServerException('Sphinx PHP extension must be installed.');
+ }
+ $engine = new SphinxSearch($target, $table);
+ if ($engine->is_connected()) {
+ $search_engine = $engine;
+ return false;
+ }
+ }
+ // Sphinx disabled or disconnected
+ return true;
+ }
+}
diff --git a/plugins/SphinxSearch/scripts/gen_config.php b/plugins/SphinxSearch/scripts/gen_config.php
new file mode 100755
index 000000000..d5a00b6b6
--- /dev/null
+++ b/plugins/SphinxSearch/scripts/gen_config.php
@@ -0,0 +1,126 @@
+#!/usr/bin/env php
+<?php
+/*
+ * StatusNet - the distributed open-source microblogging tool
+ * Copyright (C) 2009, StatusNet, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..'));
+
+$longoptions = array('base=', 'network');
+
+$helptext = <<<END_OF_TRIM_HELP
+Generates sphinx.conf file based on StatusNet configuration.
+ --base Base dir to Sphinx install
+ (default /usr/local)
+ --network Use status_network global config table
+ (non-functional at present)
+
+
+END_OF_TRIM_HELP;
+
+require_once INSTALLDIR . '/scripts/commandline.inc';
+require dirname(__FILE__) . '/sphinx-utils.php';
+
+
+$timestamp = date('r');
+print <<<END
+#
+# Sphinx configuration for StatusNet
+# Generated {$timestamp}
+#
+
+END;
+
+sphinx_iterate_sites('sphinx_site_template');
+
+print <<<END
+
+indexer
+{
+ mem_limit = 300M
+}
+
+searchd
+{
+ port = 3312
+ log = {$base}/log/searchd.log
+ query_log = {$base}/log/query.log
+ read_timeout = 5
+ max_children = 30
+ pid_file = {$base}/log/searchd.pid
+ max_matches = 1000
+ seamless_rotate = 1
+ preopen_indexes = 0
+ unlink_old = 1
+}
+
+END;
+
+
+
+/**
+ * Build config entries for a single site
+ * @fixme we only seem to have master DB currently available...
+ */
+function sphinx_site_template($sn)
+{
+ return
+ sphinx_template($sn,
+ 'profile',
+ 'SELECT id, UNIX_TIMESTAMP(created) as created_ts, nickname, fullname, location, bio, homepage FROM profile',
+ 'SELECT * FROM profile where id = $id') .
+ sphinx_template($sn,
+ 'notice',
+ 'SELECT id, UNIX_TIMESTAMP(created) as created_ts, content FROM notice',
+ 'SELECT * FROM notice where notice.id = $id AND notice.is_local != -2');
+}
+
+function sphinx_template($sn, $table, $query, $query_info)
+{
+ $base = sphinx_base();
+ $dbtype = common_config('db', 'type');
+
+ print <<<END
+
+#
+# {$sn->sitename}
+#
+source {$sn->dbname}_src_{$table}
+{
+ type = {$dbtype}
+ sql_host = {$sn->dbhost}
+ sql_user = {$sn->dbuser}
+ sql_pass = {$sn->dbpass}
+ sql_db = {$sn->dbname}
+ sql_query_pre = SET NAMES utf8;
+ sql_query = {$query}
+ sql_query_info = {$query_info}
+ sql_attr_timestamp = created_ts
+}
+
+index {$sn->dbname}_{$table}
+{
+ source = {$sn->dbname}_src_{$table}
+ path = {$base}/data/{$sn->dbname}_{$table}
+ docinfo = extern
+ charset_type = utf-8
+ min_word_len = 3
+}
+
+
+END;
+}
diff --git a/plugins/SphinxSearch/scripts/index_update.php b/plugins/SphinxSearch/scripts/index_update.php
new file mode 100755
index 000000000..23c60ced7
--- /dev/null
+++ b/plugins/SphinxSearch/scripts/index_update.php
@@ -0,0 +1,61 @@
+#!/usr/bin/env php
+<?php
+/*
+ * StatusNet - the distributed open-source microblogging tool
+ * Copyright (C) 2009, StatusNet, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+define('INSTALLDIR', realpath(dirname(__FILE__) . '/../../..'));
+
+$longoptions = array('base=', 'network');
+
+$helptext = <<<END_OF_TRIM_HELP
+Runs Sphinx search indexer.
+ --rotate Have Sphinx run index update in background and
+ rotate updated indexes into place as they finish.
+ --base Base dir to Sphinx install
+ (default /usr/local)
+ --network Use status_network global config table for site list
+ (non-functional at present)
+
+
+END_OF_TRIM_HELP;
+
+require_once INSTALLDIR . '/scripts/commandline.inc';
+require dirname(__FILE__) . '/sphinx-utils.php';
+
+sphinx_iterate_sites('sphinx_index_update');
+
+function sphinx_index_update($sn)
+{
+ $base = sphinx_base();
+
+ $baseIndexes = array('notice', 'profile');
+ $params = array();
+
+ if (have_option('rotate')) {
+ $params[] = '--rotate';
+ }
+ foreach ($baseIndexes as $index) {
+ $params[] = "{$sn->dbname}_{$index}";
+ }
+
+ $params = implode(' ', $params);
+ $cmd = "$base/bin/indexer --config $base/etc/sphinx.conf $params";
+
+ print "$cmd\n";
+ system($cmd);
+}
diff --git a/plugins/SphinxSearch/scripts/sphinx-utils.php b/plugins/SphinxSearch/scripts/sphinx-utils.php
new file mode 100644
index 000000000..7bbc25270
--- /dev/null
+++ b/plugins/SphinxSearch/scripts/sphinx-utils.php
@@ -0,0 +1,63 @@
+<?php
+/*
+ * StatusNet - the distributed open-source microblogging tool
+ * Copyright (C) 2009, StatusNet, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+function sphinx_use_network()
+{
+ return have_option('network');
+}
+
+function sphinx_base()
+{
+ if (have_option('base')) {
+ return get_option_value('base');
+ } else {
+ return "/usr/local/sphinx";
+ }
+}
+
+function sphinx_iterate_sites($callback)
+{
+ if (sphinx_use_network()) {
+ // @fixme this should use, like, some kind of config
+ Status_network::setupDB('localhost', 'statusnet', 'statuspass', 'statusnet');
+ $sn = new Status_network();
+ if (!$sn->find()) {
+ die("Confused... no sites in status_network table or lookup failed.\n");
+ }
+ while ($sn->fetch()) {
+ $callback($sn);
+ }
+ } else {
+ if (preg_match('!^(mysqli?|pgsql)://(.*?):(.*?)@(.*?)/(.*?)$!',
+ common_config('db', 'database'), $matches)) {
+ list(/*all*/, $dbtype, $dbuser, $dbpass, $dbhost, $dbname) = $matches;
+ $sn = (object)array(
+ 'sitename' => common_config('site', 'name'),
+ 'dbhost' => $dbhost,
+ 'dbuser' => $dbuser,
+ 'dbpass' => $dbpass,
+ 'dbname' => $dbname);
+ $callback($sn);
+ } else {
+ print "Unrecognized database configuration string in config.php\n";
+ exit(1);
+ }
+ }
+}
+
diff --git a/plugins/SphinxSearch/scripts/sphinx.sh b/plugins/SphinxSearch/scripts/sphinx.sh
new file mode 100755
index 000000000..b8edeb302
--- /dev/null
+++ b/plugins/SphinxSearch/scripts/sphinx.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+if [[ $1 = "start" ]]
+then
+ echo "Stopping any running daemons..."
+ /usr/local/bin/searchd --config /usr/local/etc/sphinx.conf --stop 2> /dev/null
+ echo "Starting sphinx search daemon..."
+ /usr/local/bin/searchd --config /usr/local/etc/sphinx.conf 2> /dev/null
+fi
+
+if [[ $1 = "stop" ]]
+then
+ echo "Stopping sphinx search daemon..."
+ /usr/local/bin/searchd --config /usr/local/etc/sphinx.conf --stop 2> /dev/null
+fi
diff --git a/plugins/SphinxSearch/sphinx.conf.sample b/plugins/SphinxSearch/sphinx.conf.sample
new file mode 100644
index 000000000..3de62f637
--- /dev/null
+++ b/plugins/SphinxSearch/sphinx.conf.sample
@@ -0,0 +1,71 @@
+#
+# Minimal Sphinx configuration sample for statusnet
+#
+
+source src1
+{
+ type = mysql
+ sql_host = localhost
+ sql_user = USERNAME
+ sql_pass = PASSWORD
+ sql_db = identi_ca
+ sql_port = 3306
+ sql_query = SELECT id, UNIX_TIMESTAMP(created) as created_ts, nickname, fullname, location, bio, homepage FROM profile
+ sql_query_info = SELECT * FROM profile where id = $id
+ sql_attr_timestamp = created_ts
+}
+
+
+source src2
+{
+ type = mysql
+ sql_host = localhost
+ sql_user = USERNAME
+ sql_pass = PASSWORD
+ sql_db = identi_ca
+ sql_port = 3306
+ sql_query = SELECT id, UNIX_TIMESTAMP(created) as created_ts, content FROM notice
+ sql_query_info = SELECT * FROM notice where notice.id = $id AND notice.is_local != -2
+ sql_attr_timestamp = created_ts
+}
+
+index identica_notices
+{
+ source = src2
+ path = DIRECTORY/data/identica_notices
+ docinfo = extern
+ charset_type = utf-8
+ min_word_len = 3
+ stopwords = DIRECTORY/data/stopwords-en.txt
+}
+
+
+index identica_people
+{
+ source = src1
+ path = DIRECTORY/data/identica_people
+ docinfo = extern
+ charset_type = utf-8
+ min_word_len = 3
+ stopwords = DIRECTORY/data/stopwords-en.txt
+}
+
+indexer
+{
+ mem_limit = 32M
+}
+
+searchd
+{
+ port = 3312
+ log = DIRECTORY/log/searchd.log
+ query_log = DIRECTORY/log/query.log
+ read_timeout = 5
+ max_children = 30
+ pid_file = DIRECTORY/log/searchd.pid
+ max_matches = 1000
+ seamless_rotate = 1
+ preopen_indexes = 0
+ unlink_old = 1
+}
+
diff --git a/plugins/SphinxSearch/sphinxsearch.php b/plugins/SphinxSearch/sphinxsearch.php
new file mode 100644
index 000000000..71f330828
--- /dev/null
+++ b/plugins/SphinxSearch/sphinxsearch.php
@@ -0,0 +1,96 @@
+<?php
+/*
+ * StatusNet - the distributed open-source microblogging tool
+ * Copyright (C) 2008, 2009, StatusNet, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+if (!defined('STATUSNET')) {
+ exit(1);
+}
+
+class SphinxSearch extends SearchEngine
+{
+ private $sphinx;
+ private $connected;
+
+ function __construct($target, $table)
+ {
+ $fp = @fsockopen(common_config('sphinx', 'server'), common_config('sphinx', 'port'));
+ if (!$fp) {
+ $this->connected = false;
+ return;
+ }
+ fclose($fp);
+ parent::__construct($target, $table);
+ $this->sphinx = new SphinxClient;
+ $this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port'));
+ $this->connected = true;
+ }
+
+ function is_connected()
+ {
+ return $this->connected;
+ }
+
+ function limit($offset, $count, $rss = false)
+ {
+ //FIXME without LARGEST_POSSIBLE, the most recent results aren't returned
+ // this probably has a large impact on performance
+ $LARGEST_POSSIBLE = 1e6;
+
+ if ($rss) {
+ $this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE);
+ }
+ else {
+ // return at most 50 pages of results
+ $this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE);
+ }
+
+ return $this->target->limit(0, $count);
+ }
+
+ function query($q)
+ {
+ $result = $this->sphinx->query($q, $this->remote_table());
+ if (!isset($result['matches'])) return false;
+ $id_set = join(', ', array_keys($result['matches']));
+ $this->target->whereAdd("id in ($id_set)");
+ return true;
+ }
+
+ function set_sort_mode($mode)
+ {
+ if ('chron' === $mode) {
+ $this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts');
+ return $this->target->orderBy('created desc');
+ }
+ }
+
+ function remote_table()
+ {
+ return $this->dbname() . '_' . $this->table;
+ }
+
+ function dbname()
+ {
+ // @fixme there should be a less dreadful way to do this.
+ // DB objects won't give database back until they connect, it's confusing
+ if (preg_match('!^.*?://.*?:.*?@.*?/(.*?)$!', common_config('db', 'database'), $matches)) {
+ return $matches[1];
+ }
+ throw new ServerException("Sphinx search could not identify database name");
+ }
+}