From 53c86c43c4b8cba313335f5d70f7f77d4ab640d2 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 3 Nov 2009 16:57:39 -0800 Subject: Bringing Sphinx search support up to code: broken out to a plugin, now supports multiple sites on a single server. Upgrade notes: * Index names have changed from hardcoded 'Identica_people' and 'Identica_notices' to use the database name and actual table names. Must reindex. New events: * GetSearchEngine to override default search engine class selection from plugins New scripts: * gen_config.php generates a sphinx.conf from database configuration (with theoretical support for status_network table, but it doesn't seem to be cleanly queriable right now without knowing the db setup info for that. Needs generalized support.) * Replaced old sphinx-indexer.sh and sphinx-cron.sh with index_update.php Other fixes: * sphinx.conf.sample better matches our live config, skipping unused stopword list and using a more realistic indexer memory limit Further notes: * Probably doesn't work right with PostgreSQL yet; Sphinx can pull from PG but the extraction queries currently look like they use some MySQL-specific functions. --- plugins/SphinxSearch/sphinxsearch.php | 96 +++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 plugins/SphinxSearch/sphinxsearch.php (limited to 'plugins/SphinxSearch/sphinxsearch.php') diff --git a/plugins/SphinxSearch/sphinxsearch.php b/plugins/SphinxSearch/sphinxsearch.php new file mode 100644 index 000000000..71f330828 --- /dev/null +++ b/plugins/SphinxSearch/sphinxsearch.php @@ -0,0 +1,96 @@ +. + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +class SphinxSearch extends SearchEngine +{ + private $sphinx; + private $connected; + + function __construct($target, $table) + { + $fp = @fsockopen(common_config('sphinx', 'server'), common_config('sphinx', 'port')); + if (!$fp) { + $this->connected = false; + return; + } + fclose($fp); + parent::__construct($target, $table); + $this->sphinx = new SphinxClient; + $this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port')); + $this->connected = true; + } + + function is_connected() + { + return $this->connected; + } + + function limit($offset, $count, $rss = false) + { + //FIXME without LARGEST_POSSIBLE, the most recent results aren't returned + // this probably has a large impact on performance + $LARGEST_POSSIBLE = 1e6; + + if ($rss) { + $this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE); + } + else { + // return at most 50 pages of results + $this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE); + } + + return $this->target->limit(0, $count); + } + + function query($q) + { + $result = $this->sphinx->query($q, $this->remote_table()); + if (!isset($result['matches'])) return false; + $id_set = join(', ', array_keys($result['matches'])); + $this->target->whereAdd("id in ($id_set)"); + return true; + } + + function set_sort_mode($mode) + { + if ('chron' === $mode) { + $this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts'); + return $this->target->orderBy('created desc'); + } + } + + function remote_table() + { + return $this->dbname() . '_' . $this->table; + } + + function dbname() + { + // @fixme there should be a less dreadful way to do this. + // DB objects won't give database back until they connect, it's confusing + if (preg_match('!^.*?://.*?:.*?@.*?/(.*?)$!', common_config('db', 'database'), $matches)) { + return $matches[1]; + } + throw new ServerException("Sphinx search could not identify database name"); + } +} -- cgit v1.2.3-54-g00ecf