From 53c86c43c4b8cba313335f5d70f7f77d4ab640d2 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 3 Nov 2009 16:57:39 -0800 Subject: Bringing Sphinx search support up to code: broken out to a plugin, now supports multiple sites on a single server. Upgrade notes: * Index names have changed from hardcoded 'Identica_people' and 'Identica_notices' to use the database name and actual table names. Must reindex. New events: * GetSearchEngine to override default search engine class selection from plugins New scripts: * gen_config.php generates a sphinx.conf from database configuration (with theoretical support for status_network table, but it doesn't seem to be cleanly queriable right now without knowing the db setup info for that. Needs generalized support.) * Replaced old sphinx-indexer.sh and sphinx-cron.sh with index_update.php Other fixes: * sphinx.conf.sample better matches our live config, skipping unused stopword list and using a more realistic indexer memory limit Further notes: * Probably doesn't work right with PostgreSQL yet; Sphinx can pull from PG but the extraction queries currently look like they use some MySQL-specific functions. --- plugins/SphinxSearch/sphinx.conf.sample | 71 +++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100644 plugins/SphinxSearch/sphinx.conf.sample (limited to 'plugins/SphinxSearch/sphinx.conf.sample') diff --git a/plugins/SphinxSearch/sphinx.conf.sample b/plugins/SphinxSearch/sphinx.conf.sample new file mode 100644 index 000000000..3de62f637 --- /dev/null +++ b/plugins/SphinxSearch/sphinx.conf.sample @@ -0,0 +1,71 @@ +# +# Minimal Sphinx configuration sample for statusnet +# + +source src1 +{ + type = mysql + sql_host = localhost + sql_user = USERNAME + sql_pass = PASSWORD + sql_db = identi_ca + sql_port = 3306 + sql_query = SELECT id, UNIX_TIMESTAMP(created) as created_ts, nickname, fullname, location, bio, homepage FROM profile + sql_query_info = SELECT * FROM profile where id = $id + sql_attr_timestamp = created_ts +} + + +source src2 +{ + type = mysql + sql_host = localhost + sql_user = USERNAME + sql_pass = PASSWORD + sql_db = identi_ca + sql_port = 3306 + sql_query = SELECT id, UNIX_TIMESTAMP(created) as created_ts, content FROM notice + sql_query_info = SELECT * FROM notice where notice.id = $id AND notice.is_local != -2 + sql_attr_timestamp = created_ts +} + +index identica_notices +{ + source = src2 + path = DIRECTORY/data/identica_notices + docinfo = extern + charset_type = utf-8 + min_word_len = 3 + stopwords = DIRECTORY/data/stopwords-en.txt +} + + +index identica_people +{ + source = src1 + path = DIRECTORY/data/identica_people + docinfo = extern + charset_type = utf-8 + min_word_len = 3 + stopwords = DIRECTORY/data/stopwords-en.txt +} + +indexer +{ + mem_limit = 32M +} + +searchd +{ + port = 3312 + log = DIRECTORY/log/searchd.log + query_log = DIRECTORY/log/query.log + read_timeout = 5 + max_children = 30 + pid_file = DIRECTORY/log/searchd.pid + max_matches = 1000 + seamless_rotate = 1 + preopen_indexes = 0 + unlink_old = 1 +} + -- cgit v1.2.3-54-g00ecf