summaryrefslogtreecommitdiff
path: root/includes/search/SearchEngine.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/search/SearchEngine.php')
-rw-r--r--includes/search/SearchEngine.php169
1 files changed, 125 insertions, 44 deletions
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php
index 6b3e62b1..71c05d8b 100644
--- a/includes/search/SearchEngine.php
+++ b/includes/search/SearchEngine.php
@@ -59,7 +59,7 @@ class SearchEngine {
* STUB
*
* @param string $term raw search term
- * @return SearchResultSet
+ * @return SearchResultSet|Status|null
*/
function searchText( $term ) {
return null;
@@ -71,7 +71,7 @@ class SearchEngine {
* STUB
*
* @param string $term raw search term
- * @return SearchResultSet
+ * @return SearchResultSet|null
*/
function searchTitle( $term ) {
return null;
@@ -93,8 +93,9 @@ class SearchEngine {
* @return Boolean
*/
public function supports( $feature ) {
- switch( $feature ) {
+ switch ( $feature ) {
case 'list-redirects':
+ case 'search-update':
return true;
case 'title-suffix-filter':
default:
@@ -331,8 +332,9 @@ class SearchEngine {
$parsed = substr( $query, strlen( $prefix ) + 1 );
}
}
- if ( trim( $parsed ) == '' )
+ if ( trim( $parsed ) == '' ) {
$parsed = $query; // prefix was the whole query
+ }
wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
@@ -420,8 +422,9 @@ class SearchEngine {
$formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces );
foreach ( $formatted as $key => $ns ) {
- if ( empty( $ns ) )
+ if ( empty( $ns ) ) {
$formatted[$key] = wfMessage( 'blanknamespace' )->text();
+ }
}
return $formatted;
}
@@ -451,23 +454,46 @@ class SearchEngine {
* Load up the appropriate search engine class for the currently
* active database backend, and return a configured instance.
*
+ * @param String $type Type of search backend, if not the default
* @return SearchEngine
*/
- public static function create() {
+ public static function create( $type = null ) {
global $wgSearchType;
$dbr = null;
- if ( $wgSearchType ) {
+
+ $alternatives = self::getSearchTypes();
+
+ if ( $type && in_array( $type, $alternatives ) ) {
+ $class = $type;
+ } elseif ( $wgSearchType !== null ) {
$class = $wgSearchType;
} else {
$dbr = wfGetDB( DB_SLAVE );
$class = $dbr->getSearchEngine();
}
+
$search = new $class( $dbr );
$search->setLimitOffset( 0, 0 );
return $search;
}
/**
+ * Return the search engines we support. If only $wgSearchType
+ * is set, it'll be an array of just that one item.
+ *
+ * @return array
+ */
+ public static function getSearchTypes() {
+ global $wgSearchType, $wgSearchTypeAlternatives;
+ static $alternatives = null;
+ if ( $alternatives === null ) {
+ $alternatives = $wgSearchTypeAlternatives ?: array();
+ array_unshift( $alternatives, $wgSearchType );
+ }
+ return $alternatives;
+ }
+
+ /**
* Create or update the search index record for the given page.
* Title and text should be pre-processed.
* STUB
@@ -493,6 +519,18 @@ class SearchEngine {
}
/**
+ * Delete an indexed page
+ * Title should be pre-processed.
+ * STUB
+ *
+ * @param Integer $id Page id that was deleted
+ * @param String $title Title of page that was deleted
+ */
+ function delete( $id, $title ) {
+ // no-op
+ }
+
+ /**
* Get OpenSearch suggestion template
*
* @return String
@@ -509,6 +547,31 @@ class SearchEngine {
return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
}
}
+
+ /**
+ * Get the raw text for updating the index from a content object
+ * Nicer search backends could possibly do something cooler than
+ * just returning raw text
+ *
+ * @todo This isn't ideal, we'd really like to have content-specific handling here
+ * @param Title $t Title we're indexing
+ * @param Content $c Content of the page to index
+ * @return string
+ */
+ public function getTextFromContent( Title $t, Content $c = null ) {
+ return $c ? $c->getTextForSearchIndex() : '';
+ }
+
+ /**
+ * If an implementation of SearchEngine handles all of its own text processing
+ * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s
+ * rather silly handling, it should return true here instead.
+ *
+ * @return bool
+ */
+ public function textAlreadyUpdatedForIndex() {
+ return false;
+ }
}
/**
@@ -641,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet {
}
function numRows() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
return $this->mResultSet->numRows();
}
function next() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
$row = $this->mResultSet->fetchObject();
- if ( $row === false )
+ if ( $row === false ) {
return false;
+ }
return SearchResult::newFromRow( $row );
}
function free() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
$this->mResultSet->free();
}
@@ -750,8 +817,9 @@ class SearchResult {
wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) );
$this->mRevision = Revision::newFromTitle(
$this->mTitle, $id, Revision::READ_NORMAL );
- if ( $this->mTitle->getNamespace() === NS_FILE )
+ if ( $this->mTitle->getNamespace() === NS_FILE ) {
$this->mImage = wfFindFile( $this->mTitle );
+ }
}
}
@@ -761,8 +829,9 @@ class SearchResult {
* @return Boolean
*/
function isBrokenTitle() {
- if ( is_null( $this->mTitle ) )
+ if ( is_null( $this->mTitle ) ) {
return true;
+ }
return false;
}
@@ -795,10 +864,8 @@ class SearchResult {
protected function initText() {
if ( !isset( $this->mText ) ) {
if ( $this->mRevision != null ) {
- //TODO: if we could plug in some code that knows about special content models *and* about
- // special features of the search engine, the search could benefit.
- $content = $this->mRevision->getContent();
- $this->mText = $content ? $content->getTextForSearchIndex() : '';
+ $this->mText = SearchEngine::create()
+ ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() );
} else { // TODO: can we fetch raw wikitext for commons images?
$this->mText = '';
}
@@ -810,16 +877,17 @@ class SearchResult {
* @return String: highlighted text snippet, null (and not '') if not supported
*/
function getTextSnippet( $terms ) {
- global $wgUser, $wgAdvancedSearchHighlighting;
+ global $wgAdvancedSearchHighlighting;
$this->initText();
// TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
- list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser );
+ list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs();
$h = new SearchHighlighter();
- if ( $wgAdvancedSearchHighlighting )
+ if ( $wgAdvancedSearchHighlighting ) {
return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
- else
+ } else {
return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
+ }
}
/**
@@ -863,10 +931,11 @@ class SearchResult {
* @return String: timestamp
*/
function getTimestamp() {
- if ( $this->mRevision )
+ if ( $this->mRevision ) {
return $this->mRevision->getTimestamp();
- elseif ( $this->mImage )
+ } elseif ( $this->mImage ) {
return $this->mImage->getTimestamp();
+ }
return '';
}
@@ -952,8 +1021,9 @@ class SearchHighlighter {
global $wgSearchHighlightBoundaries;
$fname = __METHOD__;
- if ( $text == '' )
+ if ( $text == '' ) {
return '';
+ }
// spli text into text + templates/links/tables
$spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
@@ -984,8 +1054,9 @@ class SearchHighlighter {
if ( $key == 2 ) {
// see if this is an image link
$ns = substr( $val[0], 2, - 1 );
- if ( $wgContLang->getNsIndex( $ns ) != NS_FILE )
+ if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
break;
+ }
}
$epat = $endPatterns[$key];
@@ -1006,7 +1077,7 @@ class SearchHighlighter {
$len = strlen( $endMatches[2][0] );
$off = $endMatches[2][1];
$this->splitAndAdd( $otherExt, $count,
- substr( $text, $start, $off + $len - $start ) );
+ substr( $text, $start, $off + $len - $start ) );
$start = $off + $len;
$found = true;
break;
@@ -1119,7 +1190,7 @@ class SearchHighlighter {
// if begin of the article contains the whole phrase, show only that !!
if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
&& $offsets[$first] < $contextchars * 2 ) {
- $snippets = array ( $first => $snippets[$first] );
+ $snippets = array( $first => $snippets[$first] );
}
// calc by how much to extend existing snippets
@@ -1155,17 +1226,19 @@ class SearchHighlighter {
$last = - 1;
$extract = '';
foreach ( $snippets as $index => $line ) {
- if ( $last == - 1 )
+ if ( $last == - 1 ) {
$extract .= $line; // first line
- elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) )
+ } elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) {
$extract .= " " . $line; // continous lines
- else
+ } else {
$extract .= '<b> ... </b>' . $line;
+ }
$last = $index;
}
- if ( $extract )
+ if ( $extract ) {
$extract .= '<b> ... </b>';
+ }
$processed = array();
foreach ( $terms as $term ) {
@@ -1193,8 +1266,9 @@ class SearchHighlighter {
$split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text );
foreach ( $split as $line ) {
$tt = trim( $line );
- if ( $tt )
+ if ( $tt ) {
$extracts[$count++] = $tt;
+ }
}
}
@@ -1268,8 +1342,9 @@ class SearchHighlighter {
while ( $char >= 0x80 && $char < 0xc0 ) {
// skip trailing bytes
$point++;
- if ( $point >= strlen( $text ) )
+ if ( $point >= strlen( $text ) ) {
return strlen( $text );
+ }
$char = ord( $text[$point] );
}
return $point;
@@ -1289,24 +1364,28 @@ class SearchHighlighter {
* @protected
*/
function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
- if ( $linesleft == 0 )
+ if ( $linesleft == 0 ) {
return; // nothing to do
+ }
foreach ( $extracts as $index => $line ) {
- if ( array_key_exists( $index, $out ) )
+ if ( array_key_exists( $index, $out ) ) {
continue; // this line already highlighted
+ }
$m = array();
- if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
+ if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
continue;
+ }
$offset = $m[0][1];
$len = strlen( $m[0][0] );
- if ( $offset + $len < $contextchars )
+ if ( $offset + $len < $contextchars ) {
$begin = 0;
- elseif ( $len > $contextchars )
+ } elseif ( $len > $contextchars ) {
$begin = $offset;
- else
+ } else {
$begin = $offset + intval( ( $len - $contextchars ) / 2 );
+ }
$end = $begin + $contextchars;
@@ -1315,8 +1394,9 @@ class SearchHighlighter {
$out[$index] = $this->extract( $line, $begin, $end, $posBegin );
$offsets[$index] = $posBegin;
$linesleft--;
- if ( $linesleft == 0 )
+ if ( $linesleft == 0 ) {
return;
+ }
}
}
@@ -1357,16 +1437,17 @@ class SearchHighlighter {
*/
function linkReplace( $matches ) {
$colon = strpos( $matches[1], ':' );
- if ( $colon === false )
+ if ( $colon === false ) {
return $matches[2]; // replace with caption
+ }
global $wgContLang;
$ns = substr( $matches[1], 0, $colon );
$index = $wgContLang->getNsIndex( $ns );
- if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) )
+ if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
return $matches[0]; // return the whole thing
- else
+ } else {
return $matches[2];
-
+ }
}
/**