summaryrefslogtreecommitdiff
path: root/includes/search/SearchEngine.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/search/SearchEngine.php')
-rw-r--r--includes/search/SearchEngine.php244
1 files changed, 166 insertions, 78 deletions
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php
index 27a321ac..71c05d8b 100644
--- a/includes/search/SearchEngine.php
+++ b/includes/search/SearchEngine.php
@@ -45,7 +45,7 @@ class SearchEngine {
*/
protected $db;
- function __construct($db = null) {
+ function __construct( $db = null ) {
if ( $db ) {
$this->db = $db;
} else {
@@ -58,8 +58,8 @@ class SearchEngine {
* If title searches are not supported or disabled, return null.
* STUB
*
- * @param $term String: raw search term
- * @return SearchResultSet
+ * @param string $term raw search term
+ * @return SearchResultSet|Status|null
*/
function searchText( $term ) {
return null;
@@ -70,8 +70,8 @@ class SearchEngine {
* If title searches are not supported or disabled, return null.
* STUB
*
- * @param $term String: raw search term
- * @return SearchResultSet
+ * @param string $term raw search term
+ * @return SearchResultSet|null
*/
function searchTitle( $term ) {
return null;
@@ -93,8 +93,9 @@ class SearchEngine {
* @return Boolean
*/
public function supports( $feature ) {
- switch( $feature ) {
+ switch ( $feature ) {
case 'list-redirects':
+ case 'search-update':
return true;
case 'title-suffix-filter':
default:
@@ -118,7 +119,7 @@ class SearchEngine {
* on text to be used for searching or updating search index.
* Default implementation does nothing (simply returns $string).
*
- * @param $string string: String to process
+ * @param string $string String to process
* @return string
*/
public function normalizeText( $string ) {
@@ -163,7 +164,7 @@ class SearchEngine {
/**
* Really find the title match.
- * @return null|\Title
+ * @return null|Title
*/
private static function getNearMatchInternal( $searchterm ) {
global $wgContLang, $wgEnableSearchContributorsByIP;
@@ -183,10 +184,15 @@ class SearchEngine {
# Exact match? No need to look further.
$title = Title::newFromText( $term );
- if ( is_null( $title ) ){
+ if ( is_null( $title ) ) {
return null;
}
+ # Try files if searching in the Media: namespace
+ if ( $title->getNamespace() == NS_MEDIA ) {
+ $title = Title::makeTitle( NS_FILE, $title->getText() );
+ }
+
if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) {
return $title;
}
@@ -197,22 +203,23 @@ class SearchEngine {
return $title;
}
+ if ( !wfRunHooks( 'SearchAfterNoDirectMatch', array( $term, &$title ) ) ) {
+ return $title;
+ }
+
# Now try all lower case (i.e. first letter capitalized)
- #
$title = Title::newFromText( $wgContLang->lc( $term ) );
if ( $title && $title->exists() ) {
return $title;
}
# Now try capitalized string
- #
$title = Title::newFromText( $wgContLang->ucwords( $term ) );
if ( $title && $title->exists() ) {
return $title;
}
# Now try all upper case
- #
$title = Title::newFromText( $wgContLang->uc( $term ) );
if ( $title && $title->exists() ) {
return $title;
@@ -233,7 +240,6 @@ class SearchEngine {
$title = Title::newFromText( $searchterm );
-
# Entering an IP address goes to the contributions page
if ( $wgEnableSearchContributorsByIP ) {
if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) )
@@ -242,7 +248,6 @@ class SearchEngine {
}
}
-
# Entering a user goes to the user page whether it's there or not
if ( $title->getNamespace() == NS_USER ) {
return $title;
@@ -327,8 +332,9 @@ class SearchEngine {
$parsed = substr( $query, strlen( $prefix ) + 1 );
}
}
- if ( trim( $parsed ) == '' )
+ if ( trim( $parsed ) == '' ) {
$parsed = $query; // prefix was the whole query
+ }
wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
@@ -416,8 +422,9 @@ class SearchEngine {
$formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces );
foreach ( $formatted as $key => $ns ) {
- if ( empty( $ns ) )
+ if ( empty( $ns ) ) {
$formatted[$key] = wfMessage( 'blanknamespace' )->text();
+ }
}
return $formatted;
}
@@ -447,23 +454,46 @@ class SearchEngine {
* Load up the appropriate search engine class for the currently
* active database backend, and return a configured instance.
*
+ * @param String $type Type of search backend, if not the default
* @return SearchEngine
*/
- public static function create() {
+ public static function create( $type = null ) {
global $wgSearchType;
$dbr = null;
- if ( $wgSearchType ) {
+
+ $alternatives = self::getSearchTypes();
+
+ if ( $type && in_array( $type, $alternatives ) ) {
+ $class = $type;
+ } elseif ( $wgSearchType !== null ) {
$class = $wgSearchType;
} else {
$dbr = wfGetDB( DB_SLAVE );
$class = $dbr->getSearchEngine();
}
+
$search = new $class( $dbr );
$search->setLimitOffset( 0, 0 );
return $search;
}
/**
+ * Return the search engines we support. If only $wgSearchType
+ * is set, it'll be an array of just that one item.
+ *
+ * @return array
+ */
+ public static function getSearchTypes() {
+ global $wgSearchType, $wgSearchTypeAlternatives;
+ static $alternatives = null;
+ if ( $alternatives === null ) {
+ $alternatives = $wgSearchTypeAlternatives ?: array();
+ array_unshift( $alternatives, $wgSearchType );
+ }
+ return $alternatives;
+ }
+
+ /**
* Create or update the search index record for the given page.
* Title and text should be pre-processed.
* STUB
@@ -489,6 +519,18 @@ class SearchEngine {
}
/**
+ * Delete an indexed page
+ * Title should be pre-processed.
+ * STUB
+ *
+ * @param Integer $id Page id that was deleted
+ * @param String $title Title of page that was deleted
+ */
+ function delete( $id, $title ) {
+ // no-op
+ }
+
+ /**
* Get OpenSearch suggestion template
*
* @return String
@@ -505,6 +547,31 @@ class SearchEngine {
return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
}
}
+
+ /**
+ * Get the raw text for updating the index from a content object
+ * Nicer search backends could possibly do something cooler than
+ * just returning raw text
+ *
+ * @todo This isn't ideal, we'd really like to have content-specific handling here
+ * @param Title $t Title we're indexing
+ * @param Content $c Content of the page to index
+ * @return string
+ */
+ public function getTextFromContent( Title $t, Content $c = null ) {
+ return $c ? $c->getTextForSearchIndex() : '';
+ }
+
+ /**
+ * If an implementation of SearchEngine handles all of its own text processing
+ * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s
+ * rather silly handling, it should return true here instead.
+ *
+ * @return bool
+ */
+ public function textAlreadyUpdatedForIndex() {
+ return false;
+ }
}
/**
@@ -637,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet {
}
function numRows() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
return $this->mResultSet->numRows();
}
function next() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
$row = $this->mResultSet->fetchObject();
- if ( $row === false )
+ if ( $row === false ) {
return false;
+ }
return SearchResult::newFromRow( $row );
}
function free() {
- if ( $this->mResultSet === false )
+ if ( $this->mResultSet === false ) {
return false;
+ }
$this->mResultSet->free();
}
@@ -669,7 +740,6 @@ class SearchResultTooMany {
# # Some search engines may bail out if too many matches are found
}
-
/**
* @todo FIXME: This class is horribly factored. It would probably be better to
* have a useful base class to which you pass some standard information, then
@@ -747,8 +817,9 @@ class SearchResult {
wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) );
$this->mRevision = Revision::newFromTitle(
$this->mTitle, $id, Revision::READ_NORMAL );
- if ( $this->mTitle->getNamespace() === NS_FILE )
+ if ( $this->mTitle->getNamespace() === NS_FILE ) {
$this->mImage = wfFindFile( $this->mTitle );
+ }
}
}
@@ -758,8 +829,9 @@ class SearchResult {
* @return Boolean
*/
function isBrokenTitle() {
- if ( is_null( $this->mTitle ) )
+ if ( is_null( $this->mTitle ) ) {
return true;
+ }
return false;
}
@@ -791,31 +863,35 @@ class SearchResult {
*/
protected function initText() {
if ( !isset( $this->mText ) ) {
- if ( $this->mRevision != null )
- $this->mText = $this->mRevision->getText();
- else // TODO: can we fetch raw wikitext for commons images?
+ if ( $this->mRevision != null ) {
+ $this->mText = SearchEngine::create()
+ ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() );
+ } else { // TODO: can we fetch raw wikitext for commons images?
$this->mText = '';
-
+ }
}
}
/**
- * @param $terms Array: terms to highlight
+ * @param array $terms terms to highlight
* @return String: highlighted text snippet, null (and not '') if not supported
*/
function getTextSnippet( $terms ) {
- global $wgUser, $wgAdvancedSearchHighlighting;
+ global $wgAdvancedSearchHighlighting;
$this->initText();
- list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser );
+
+ // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
+ list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs();
$h = new SearchHighlighter();
- if ( $wgAdvancedSearchHighlighting )
+ if ( $wgAdvancedSearchHighlighting ) {
return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
- else
+ } else {
return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
+ }
}
/**
- * @param $terms Array: terms to highlight
+ * @param array $terms terms to highlight
* @return String: highlighted title, '' if not supported
*/
function getTitleSnippet( $terms ) {
@@ -823,7 +899,7 @@ class SearchResult {
}
/**
- * @param $terms Array: terms to highlight
+ * @param array $terms terms to highlight
* @return String: highlighted redirect name (redirect to this page), '' if none or not supported
*/
function getRedirectSnippet( $terms ) {
@@ -855,10 +931,11 @@ class SearchResult {
* @return String: timestamp
*/
function getTimestamp() {
- if ( $this->mRevision )
+ if ( $this->mRevision ) {
return $this->mRevision->getTimestamp();
- elseif ( $this->mImage )
+ } elseif ( $this->mImage ) {
return $this->mImage->getTimestamp();
+ }
return '';
}
@@ -934,7 +1011,7 @@ class SearchHighlighter {
* Default implementation of wikitext highlighting
*
* @param $text String
- * @param $terms Array: terms to highlight (unescaped)
+ * @param array $terms terms to highlight (unescaped)
* @param $contextlines Integer
* @param $contextchars Integer
* @return String
@@ -944,8 +1021,9 @@ class SearchHighlighter {
global $wgSearchHighlightBoundaries;
$fname = __METHOD__;
- if ( $text == '' )
+ if ( $text == '' ) {
return '';
+ }
// spli text into text + templates/links/tables
$spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
@@ -962,7 +1040,7 @@ class SearchHighlighter {
}
$spat .= '/';
$textExt = array(); // text extracts
- $otherExt = array(); // other extracts
+ $otherExt = array(); // other extracts
wfProfileIn( "$fname-split" );
$start = 0;
$textLen = strlen( $text );
@@ -976,8 +1054,9 @@ class SearchHighlighter {
if ( $key == 2 ) {
// see if this is an image link
$ns = substr( $val[0], 2, - 1 );
- if ( $wgContLang->getNsIndex( $ns ) != NS_FILE )
+ if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
break;
+ }
}
$epat = $endPatterns[$key];
@@ -998,7 +1077,7 @@ class SearchHighlighter {
$len = strlen( $endMatches[2][0] );
$off = $endMatches[2][1];
$this->splitAndAdd( $otherExt, $count,
- substr( $text, $start, $off + $len - $start ) );
+ substr( $text, $start, $off + $len - $start ) );
$start = $off + $len;
$found = true;
break;
@@ -1111,7 +1190,7 @@ class SearchHighlighter {
// if begin of the article contains the whole phrase, show only that !!
if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
&& $offsets[$first] < $contextchars * 2 ) {
- $snippets = array ( $first => $snippets[$first] );
+ $snippets = array( $first => $snippets[$first] );
}
// calc by how much to extend existing snippets
@@ -1131,8 +1210,8 @@ class SearchHighlighter {
// add more lines
$add = $index + 1;
while ( $len < $targetchars - 20
- && array_key_exists( $add, $all )
- && !array_key_exists( $add, $snippets ) ) {
+ && array_key_exists( $add, $all )
+ && !array_key_exists( $add, $snippets ) ) {
$offsets[$add] = 0;
$tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
$extended[$add] = $tt;
@@ -1142,22 +1221,24 @@ class SearchHighlighter {
}
}
- // $snippets = array_map('htmlspecialchars', $extended);
+ // $snippets = array_map( 'htmlspecialchars', $extended );
$snippets = $extended;
$last = - 1;
$extract = '';
foreach ( $snippets as $index => $line ) {
- if ( $last == - 1 )
+ if ( $last == - 1 ) {
$extract .= $line; // first line
- elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) )
+ } elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) {
$extract .= " " . $line; // continous lines
- else
+ } else {
$extract .= '<b> ... </b>' . $line;
+ }
$last = $index;
}
- if ( $extract )
+ if ( $extract ) {
$extract .= '<b> ... </b>';
+ }
$processed = array();
foreach ( $terms as $term ) {
@@ -1177,7 +1258,7 @@ class SearchHighlighter {
/**
* Split text into lines and add it to extracts array
*
- * @param $extracts Array: index -> $line
+ * @param array $extracts index -> $line
* @param $count Integer
* @param $text String
*/
@@ -1185,8 +1266,9 @@ class SearchHighlighter {
$split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text );
foreach ( $split as $line ) {
$tt = trim( $line );
- if ( $tt )
+ if ( $tt ) {
$extracts[$count++] = $tt;
+ }
}
}
@@ -1232,7 +1314,7 @@ class SearchHighlighter {
$posEnd = $end;
}
- if ( $end > $start ) {
+ if ( $end > $start ) {
return substr( $text, $start, $end - $start );
} else {
return '';
@@ -1260,8 +1342,9 @@ class SearchHighlighter {
while ( $char >= 0x80 && $char < 0xc0 ) {
// skip trailing bytes
$point++;
- if ( $point >= strlen( $text ) )
+ if ( $point >= strlen( $text ) ) {
return strlen( $text );
+ }
$char = ord( $text[$point] );
}
return $point;
@@ -1272,33 +1355,37 @@ class SearchHighlighter {
/**
* Search extracts for a pattern, and return snippets
*
- * @param $pattern String: regexp for matching lines
- * @param $extracts Array: extracts to search
+ * @param string $pattern regexp for matching lines
+ * @param array $extracts extracts to search
* @param $linesleft Integer: number of extracts to make
* @param $contextchars Integer: length of snippet
- * @param $out Array: map for highlighted snippets
- * @param $offsets Array: map of starting points of snippets
+ * @param array $out map for highlighted snippets
+ * @param array $offsets map of starting points of snippets
* @protected
*/
function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
- if ( $linesleft == 0 )
+ if ( $linesleft == 0 ) {
return; // nothing to do
+ }
foreach ( $extracts as $index => $line ) {
- if ( array_key_exists( $index, $out ) )
+ if ( array_key_exists( $index, $out ) ) {
continue; // this line already highlighted
+ }
$m = array();
- if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
+ if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
continue;
+ }
$offset = $m[0][1];
$len = strlen( $m[0][0] );
- if ( $offset + $len < $contextchars )
+ if ( $offset + $len < $contextchars ) {
$begin = 0;
- elseif ( $len > $contextchars )
+ } elseif ( $len > $contextchars ) {
$begin = $offset;
- else
+ } else {
$begin = $offset + intval( ( $len - $contextchars ) / 2 );
+ }
$end = $begin + $contextchars;
@@ -1307,8 +1394,9 @@ class SearchHighlighter {
$out[$index] = $this->extract( $line, $begin, $end, $posBegin );
$offsets[$index] = $posBegin;
$linesleft--;
- if ( $linesleft == 0 )
+ if ( $linesleft == 0 ) {
return;
+ }
}
}
@@ -1321,12 +1409,12 @@ class SearchHighlighter {
$fname = __METHOD__;
wfProfileIn( $fname );
- // $text = preg_replace("/'{2,5}/", "", $text);
- // $text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text);
- // $text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text);
- // $text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text);
- // $text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text);
- // $text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text);
+ // $text = preg_replace( "/'{2,5}/", "", $text );
+ // $text = preg_replace( "/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text );
+ // $text = preg_replace( "/\[\[([^]|]+)\]\]/", "\\1", $text );
+ // $text = preg_replace( "/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text );
+ // $text = preg_replace( "/\\{\\|(.*?)\\|\\}/", "", $text );
+ // $text = preg_replace( "/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text );
$text = preg_replace( "/\\{\\{([^|]+?)\\}\\}/", "", $text );
$text = preg_replace( "/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text );
$text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text );
@@ -1349,16 +1437,17 @@ class SearchHighlighter {
*/
function linkReplace( $matches ) {
$colon = strpos( $matches[1], ':' );
- if ( $colon === false )
+ if ( $colon === false ) {
return $matches[2]; // replace with caption
+ }
global $wgContLang;
$ns = substr( $matches[1], 0, $colon );
$index = $wgContLang->getNsIndex( $ns );
- if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) )
+ if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
return $matches[0]; // return the whole thing
- else
+ } else {
return $matches[2];
-
+ }
}
/**
@@ -1408,8 +1497,7 @@ class SearchHighlighter {
$line = htmlspecialchars( $pre . $found . $post );
$pat2 = '/(' . $terms . ")/i";
- $line = preg_replace( $pat2,
- "<span class='searchmatch'>\\1</span>", $line );
+ $line = preg_replace( $pat2, "<span class='searchmatch'>\\1</span>", $line );
$extract .= "${line}\n";
}