From 53c86c43c4b8cba313335f5d70f7f77d4ab640d2 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 3 Nov 2009 16:57:39 -0800 Subject: Bringing Sphinx search support up to code: broken out to a plugin, now supports multiple sites on a single server. Upgrade notes: * Index names have changed from hardcoded 'Identica_people' and 'Identica_notices' to use the database name and actual table names. Must reindex. New events: * GetSearchEngine to override default search engine class selection from plugins New scripts: * gen_config.php generates a sphinx.conf from database configuration (with theoretical support for status_network table, but it doesn't seem to be cleanly queriable right now without knowing the db setup info for that. Needs generalized support.) * Replaced old sphinx-indexer.sh and sphinx-cron.sh with index_update.php Other fixes: * sphinx.conf.sample better matches our live config, skipping unused stopword list and using a more realistic indexer memory limit Further notes: * Probably doesn't work right with PostgreSQL yet; Sphinx can pull from PG but the extraction queries currently look like they use some MySQL-specific functions. --- lib/default.php | 4 --- lib/search_engines.php | 71 +++++--------------------------------------------- 2 files changed, 6 insertions(+), 69 deletions(-) (limited to 'lib') diff --git a/lib/default.php b/lib/default.php index f6cc4b725..95366e0b3 100644 --- a/lib/default.php +++ b/lib/default.php @@ -125,10 +125,6 @@ $default = 'public' => array()), # JIDs of users who want to receive the public stream 'invite' => array('enabled' => true), - 'sphinx' => - array('enabled' => false, - 'server' => 'localhost', - 'port' => 3312), 'tag' => array('dropoff' => 864000.0), 'popular' => diff --git a/lib/search_engines.php b/lib/search_engines.php index 69f6ff468..332db3f89 100644 --- a/lib/search_engines.php +++ b/lib/search_engines.php @@ -46,70 +46,11 @@ class SearchEngine } } -class SphinxSearch extends SearchEngine -{ - private $sphinx; - private $connected; - - function __construct($target, $table) - { - $fp = @fsockopen(common_config('sphinx', 'server'), common_config('sphinx', 'port')); - if (!$fp) { - $this->connected = false; - return; - } - fclose($fp); - parent::__construct($target, $table); - $this->sphinx = new SphinxClient; - $this->sphinx->setServer(common_config('sphinx', 'server'), common_config('sphinx', 'port')); - $this->connected = true; - } - - function is_connected() - { - return $this->connected; - } - - function limit($offset, $count, $rss = false) - { - //FIXME without LARGEST_POSSIBLE, the most recent results aren't returned - // this probably has a large impact on performance - $LARGEST_POSSIBLE = 1e6; - - if ($rss) { - $this->sphinx->setLimits($offset, $count, $count, $LARGEST_POSSIBLE); - } - else { - // return at most 50 pages of results - $this->sphinx->setLimits($offset, $count, 50 * ($count - 1), $LARGEST_POSSIBLE); - } - - return $this->target->limit(0, $count); - } - - function query($q) - { - $result = $this->sphinx->query($q, $this->table); - if (!isset($result['matches'])) return false; - $id_set = join(', ', array_keys($result['matches'])); - $this->target->whereAdd("id in ($id_set)"); - return true; - } - - function set_sort_mode($mode) - { - if ('chron' === $mode) { - $this->sphinx->SetSortMode(SPH_SORT_ATTR_DESC, 'created_ts'); - return $this->target->orderBy('created desc'); - } - } -} - class MySQLSearch extends SearchEngine { function query($q) { - if ('identica_people' === $this->table) { + if ('profile' === $this->table) { $this->target->whereAdd('MATCH(nickname, fullname, location, bio, homepage) ' . 'AGAINST (\''.addslashes($q).'\' IN BOOLEAN MODE)'); if (strtolower($q) != $q) { @@ -117,7 +58,7 @@ class MySQLSearch extends SearchEngine 'AGAINST (\''.addslashes(strtolower($q)).'\' IN BOOLEAN MODE)', 'OR'); } return true; - } else if ('identica_notices' === $this->table) { + } else if ('notice' === $this->table) { // Don't show imported notices $this->target->whereAdd('notice.is_local != ' . Notice::GATEWAY); @@ -143,13 +84,13 @@ class MySQLLikeSearch extends SearchEngine { function query($q) { - if ('identica_people' === $this->table) { + if ('profile' === $this->table) { $qry = sprintf('(nickname LIKE "%%%1$s%%" OR '. ' fullname LIKE "%%%1$s%%" OR '. ' location LIKE "%%%1$s%%" OR '. ' bio LIKE "%%%1$s%%" OR '. ' homepage LIKE "%%%1$s%%")', addslashes($q)); - } else if ('identica_notices' === $this->table) { + } else if ('notice' === $this->table) { $qry = sprintf('content LIKE "%%%1$s%%"', addslashes($q)); } else { throw new ServerException('Unknown table: ' . $this->table); @@ -165,9 +106,9 @@ class PGSearch extends SearchEngine { function query($q) { - if ('identica_people' === $this->table) { + if ('profile' === $this->table) { return $this->target->whereAdd('textsearch @@ plainto_tsquery(\''.addslashes($q).'\')'); - } else if ('identica_notices' === $this->table) { + } else if ('notice' === $this->table) { // XXX: We need to filter out gateway notices (notice.is_local = -2) --Zach -- cgit v1.2.3-54-g00ecf