diff options
author | Brion Vibber <brion@pobox.com> | 2009-11-03 16:57:39 -0800 |
---|---|---|
committer | Brion Vibber <brion@pobox.com> | 2009-11-10 13:44:40 -0800 |
commit | 53c86c43c4b8cba313335f5d70f7f77d4ab640d2 (patch) | |
tree | 96de2e9f0793685e35702946ec8539edbcf80921 /plugins/SphinxSearch/sphinxsearch.php | |
parent | 1cd6650ae43d548f209d68e9feaaa7185d5ffecb (diff) |
Bringing Sphinx search support up to code: broken out to a plugin, now supports multiple sites on a single server.
Upgrade notes:
* Index names have changed from hardcoded 'Identica_people' and 'Identica_notices' to use the database name and actual table names. Must reindex.
New events:
* GetSearchEngine to override default search engine class selection from plugins
New scripts:
* gen_config.php generates a sphinx.conf from database configuration (with theoretical support for status_network table, but it doesn't seem to be cleanly queriable right now without knowing the db setup info for that. Needs generalized support.)
* Replaced old sphinx-indexer.sh and sphinx-cron.sh with index_update.php
Other fixes:
* sphinx.conf.sample better matches our live config, skipping unused stopword list and using a more realistic indexer memory limit
Further notes:
* Probably doesn't work right with PostgreSQL yet; Sphinx can pull from PG but the extraction queries currently look like they use some MySQL-specific functions.
Diffstat (limited to 'plugins/SphinxSearch/sphinxsearch.php')
-rw-r--r-- | plugins/SphinxSearch/sphinxsearch.php | 96 |
1 files changed, 96 insertions, 0 deletions
diff --git a/plugins/SphinxSearch/sphinxsearch.php b/plugins/SphinxSearch/sphinxsearch.php new file mode 100644 index 000000000..71f330828 --- /dev/null +++ b/plugins/SphinxSearch/sphinxsearch.php @@ -0,0 +1,96 @@ +<?php +/* + * StatusNet - the distributed open-source microblogging tool + * Copyright (C) 2008, 2009, StatusNet, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +class SphinxSearch extends SearchEngine +{ + private $sphinx; + private $connected; + + function __construct($target, $table) + { + $fp = @fsockopen(common_config('sphinx', 'server'), common_config('sphinx', 'port')); + if (!$fp) { + $this->connected = false; + return; + } + fclose($fp); + parent::__construct($target, $table); + $this->sphinx = new SphinxClient; + $this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port')); + $this->connected = true; + } + + function is_connected() + { + return $this->connected; + } + + function limit($offset, $count, $rss = false) + { + //FIXME without LARGEST_POSSIBLE, the most recent results aren't returned + // this probably has a large impact on performance + $LARGEST_POSSIBLE = 1e6; + + if ($rss) { + $this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE); + } + else { + // return at most 50 pages of results + $this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE); + } + + return $this->target->limit(0, $count); + } + + function query($q) + { + $result = $this->sphinx->query($q, $this->remote_table()); + if (!isset($result['matches'])) return false; + $id_set = join(', ', array_keys($result['matches'])); + $this->target->whereAdd("id in ($id_set)"); + return true; + } + + function set_sort_mode($mode) + { + if ('chron' === $mode) { + $this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts'); + return $this->target->orderBy('created desc'); + } + } + + function remote_table() + { + return $this->dbname() . '_' . $this->table; + } + + function dbname() + { + // @fixme there should be a less dreadful way to do this. + // DB objects won't give database back until they connect, it's confusing + if (preg_match('!^.*?://.*?:.*?@.*?/(.*?)$!', common_config('db', 'database'), $matches)) { + return $matches[1]; + } + throw new ServerException("Sphinx search could not identify database name"); + } +} |