1 files changed, 166 insertions, 78 deletions
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php
index 27a321ac..71c05d8b 100644
--- a/includes/search/SearchEngine.php
+++ b/includes/search/SearchEngine.php
@@ -45,7 +45,7 @@ class SearchEngine {
 	 */
 	protected $db;
 
-	function __construct($db = null) {
+	function __construct( $db = null ) {
 		if ( $db ) {
 			$this->db = $db;
 		} else {
@@ -58,8 +58,8 @@ class SearchEngine {
 	 * If title searches are not supported or disabled, return null.
 	 * STUB
 	 *
-	 * @param $term String: raw search term
-	 * @return SearchResultSet
+	 * @param string $term raw search term
+	 * @return SearchResultSet|Status|null
 	 */
 	function searchText( $term ) {
 		return null;
@@ -70,8 +70,8 @@ class SearchEngine {
 	 * If title searches are not supported or disabled, return null.
 	 * STUB
 	 *
-	 * @param $term String: raw search term
-	 * @return SearchResultSet
+	 * @param string $term raw search term
+	 * @return SearchResultSet|null
 	 */
 	function searchTitle( $term ) {
 		return null;
@@ -93,8 +93,9 @@ class SearchEngine {
 	 * @return Boolean
 	 */
 	public function supports( $feature ) {
-		switch( $feature ) {
+		switch ( $feature ) {
 		case 'list-redirects':
+		case 'search-update':
 			return true;
 		case 'title-suffix-filter':
 		default:
@@ -118,7 +119,7 @@ class SearchEngine {
 	 * on text to be used for searching or updating search index.
 	 * Default implementation does nothing (simply returns $string).
 	 *
-	 * @param $string string: String to process
+	 * @param string $string String to process
 	 * @return string
 	 */
 	public function normalizeText( $string ) {
@@ -163,7 +164,7 @@ class SearchEngine {
 
 	/**
 	 * Really find the title match.
-	 * @return null|\Title
+	 * @return null|Title
 	 */
 	private static function getNearMatchInternal( $searchterm ) {
 		global $wgContLang, $wgEnableSearchContributorsByIP;
@@ -183,10 +184,15 @@ class SearchEngine {
 
 			# Exact match? No need to look further.
 			$title = Title::newFromText( $term );
-			if ( is_null( $title ) ){
+			if ( is_null( $title ) ) {
 				return null;
 			}
 
+			# Try files if searching in the Media: namespace
+			if ( $title->getNamespace() == NS_MEDIA ) {
+				$title = Title::makeTitle( NS_FILE, $title->getText() );
+			}
+
 			if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) {
 				return $title;
 			}
@@ -197,22 +203,23 @@ class SearchEngine {
 				return $title;
 			}
 
+			if ( !wfRunHooks( 'SearchAfterNoDirectMatch', array( $term, &$title ) ) ) {
+				return $title;
+			}
+
 			# Now try all lower case (i.e. first letter capitalized)
-			#
 			$title = Title::newFromText( $wgContLang->lc( $term ) );
 			if ( $title && $title->exists() ) {
 				return $title;
 			}
 
 			# Now try capitalized string
-			#
 			$title = Title::newFromText( $wgContLang->ucwords( $term ) );
 			if ( $title && $title->exists() ) {
 				return $title;
 			}
 
 			# Now try all upper case
-			#
 			$title = Title::newFromText( $wgContLang->uc( $term ) );
 			if ( $title && $title->exists() ) {
 				return $title;
@@ -233,7 +240,6 @@ class SearchEngine {
 
 		$title = Title::newFromText( $searchterm );
 
-
 		# Entering an IP address goes to the contributions page
 		if ( $wgEnableSearchContributorsByIP ) {
 			if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) )
@@ -242,7 +248,6 @@ class SearchEngine {
 			}
 		}
 
-
 		# Entering a user goes to the user page whether it's there or not
 		if ( $title->getNamespace() == NS_USER ) {
 			return $title;
@@ -327,8 +332,9 @@ class SearchEngine {
 				$parsed = substr( $query, strlen( $prefix ) + 1 );
 			}
 		}
-		if ( trim( $parsed ) == '' )
+		if ( trim( $parsed ) == '' ) {
 			$parsed = $query; // prefix was the whole query
+		}
 
 		wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) );
 
@@ -416,8 +422,9 @@ class SearchEngine {
 
 		$formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces );
 		foreach ( $formatted as $key => $ns ) {
-			if ( empty( $ns ) )
+			if ( empty( $ns ) ) {
 				$formatted[$key] = wfMessage( 'blanknamespace' )->text();
+			}
 		}
 		return $formatted;
 	}
@@ -447,23 +454,46 @@ class SearchEngine {
 	 * Load up the appropriate search engine class for the currently
 	 * active database backend, and return a configured instance.
 	 *
+	 * @param String $type Type of search backend, if not the default
 	 * @return SearchEngine
 	 */
-	public static function create() {
+	public static function create( $type = null ) {
 		global $wgSearchType;
 		$dbr = null;
-		if ( $wgSearchType ) {
+
+		$alternatives = self::getSearchTypes();
+
+		if ( $type && in_array( $type, $alternatives ) ) {
+			$class = $type;
+		} elseif ( $wgSearchType !== null ) {
 			$class = $wgSearchType;
 		} else {
 			$dbr = wfGetDB( DB_SLAVE );
 			$class = $dbr->getSearchEngine();
 		}
+
 		$search = new $class( $dbr );
 		$search->setLimitOffset( 0, 0 );
 		return $search;
 	}
 
 	/**
+	 * Return the search engines we support. If only $wgSearchType
+	 * is set, it'll be an array of just that one item.
+	 *
+	 * @return array
+	 */
+	public static function getSearchTypes() {
+		global $wgSearchType, $wgSearchTypeAlternatives;
+		static $alternatives = null;
+		if ( $alternatives === null ) {
+			$alternatives = $wgSearchTypeAlternatives ?: array();
+			array_unshift( $alternatives, $wgSearchType );
+		}
+		return $alternatives;
+	}
+
+	/**
 	 * Create or update the search index record for the given page.
 	 * Title and text should be pre-processed.
 	 * STUB
@@ -489,6 +519,18 @@ class SearchEngine {
 	}
 
 	/**
+	 * Delete an indexed page
+	 * Title should be pre-processed.
+	 * STUB
+	 *
+	 * @param Integer $id Page id that was deleted
+	 * @param String $title Title of page that was deleted
+	 */
+	function delete( $id, $title ) {
+		// no-op
+	}
+
+	/**
 	 * Get OpenSearch suggestion template
 	 *
 	 * @return String
@@ -505,6 +547,31 @@ class SearchEngine {
 			return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
 		}
 	}
+
+	/**
+	 * Get the raw text for updating the index from a content object
+	 * Nicer search backends could possibly do something cooler than
+	 * just returning raw text
+	 *
+	 * @todo This isn't ideal, we'd really like to have content-specific handling here
+	 * @param Title $t Title we're indexing
+	 * @param Content $c Content of the page to index
+	 * @return string
+	 */
+	public function getTextFromContent( Title $t, Content $c = null ) {
+		return $c ? $c->getTextForSearchIndex() : '';
+	}
+
+	/**
+	 * If an implementation of SearchEngine handles all of its own text processing
+	 * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s
+	 * rather silly handling, it should return true here instead.
+	 *
+	 * @return bool
+	 */
+	public function textAlreadyUpdatedForIndex() {
+		return false;
+	}
 }
 
 /**
@@ -637,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet {
 	}
 
 	function numRows() {
-		if ( $this->mResultSet === false )
+		if ( $this->mResultSet === false ) {
 			return false;
+		}
 
 		return $this->mResultSet->numRows();
 	}
 
 	function next() {
-		if ( $this->mResultSet === false )
+		if ( $this->mResultSet === false ) {
 			return false;
+		}
 
 		$row = $this->mResultSet->fetchObject();
-		if ( $row === false )
+		if ( $row === false ) {
 			return false;
+		}
 
 		return SearchResult::newFromRow( $row );
 	}
 
 	function free() {
-		if ( $this->mResultSet === false )
+		if ( $this->mResultSet === false ) {
 			return false;
+		}
 
 		$this->mResultSet->free();
 	}
@@ -669,7 +740,6 @@ class SearchResultTooMany {
 	# # Some search engines may bail out if too many matches are found
 }
 
-
 /**
  * @todo FIXME: This class is horribly factored. It would probably be better to
  * have a useful base class to which you pass some standard information, then
@@ -747,8 +817,9 @@ class SearchResult {
 			wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) );
 			$this->mRevision = Revision::newFromTitle(
 				$this->mTitle, $id, Revision::READ_NORMAL );
-			if ( $this->mTitle->getNamespace() === NS_FILE )
+			if ( $this->mTitle->getNamespace() === NS_FILE ) {
 				$this->mImage = wfFindFile( $this->mTitle );
+			}
 		}
 	}
 
@@ -758,8 +829,9 @@ class SearchResult {
 	 * @return Boolean
 	 */
 	function isBrokenTitle() {
-		if ( is_null( $this->mTitle ) )
+		if ( is_null( $this->mTitle ) ) {
 			return true;
+		}
 		return false;
 	}
 
@@ -791,31 +863,35 @@ class SearchResult {
 	 */
 	protected function initText() {
 		if ( !isset( $this->mText ) ) {
-			if ( $this->mRevision != null )
-				$this->mText = $this->mRevision->getText();
-			else // TODO: can we fetch raw wikitext for commons images?
+			if ( $this->mRevision != null ) {
+				$this->mText = SearchEngine::create()
+					->getTextFromContent( $this->mTitle, $this->mRevision->getContent() );
+			} else { // TODO: can we fetch raw wikitext for commons images?
 				$this->mText = '';
-
+			}
 		}
 	}
 
 	/**
-	 * @param $terms Array: terms to highlight
+	 * @param array $terms terms to highlight
 	 * @return String: highlighted text snippet, null (and not '') if not supported
 	 */
 	function getTextSnippet( $terms ) {
-		global $wgUser, $wgAdvancedSearchHighlighting;
+		global $wgAdvancedSearchHighlighting;
 		$this->initText();
-		list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser );
+
+		// TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter.
+		list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs();
 		$h = new SearchHighlighter();
-		if ( $wgAdvancedSearchHighlighting )
+		if ( $wgAdvancedSearchHighlighting ) {
 			return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars );
-		else
+		} else {
 			return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars );
+		}
 	}
 
 	/**
-	 * @param $terms Array: terms to highlight
+	 * @param array $terms terms to highlight
 	 * @return String: highlighted title, '' if not supported
 	 */
 	function getTitleSnippet( $terms ) {
@@ -823,7 +899,7 @@ class SearchResult {
 	}
 
 	/**
-	 * @param $terms Array: terms to highlight
+	 * @param array $terms terms to highlight
 	 * @return String: highlighted redirect name (redirect to this page), '' if none or not supported
 	 */
 	function getRedirectSnippet( $terms ) {
@@ -855,10 +931,11 @@ class SearchResult {
 	 * @return String: timestamp
 	 */
 	function getTimestamp() {
-		if ( $this->mRevision )
+		if ( $this->mRevision ) {
 			return $this->mRevision->getTimestamp();
-		elseif ( $this->mImage )
+		} elseif ( $this->mImage ) {
 			return $this->mImage->getTimestamp();
+		}
 		return '';
 	}
 
@@ -934,7 +1011,7 @@ class SearchHighlighter {
 	 * Default implementation of wikitext highlighting
 	 *
 	 * @param $text String
-	 * @param $terms Array: terms to highlight (unescaped)
+	 * @param array $terms terms to highlight (unescaped)
 	 * @param $contextlines Integer
 	 * @param $contextchars Integer
 	 * @return String
@@ -944,8 +1021,9 @@ class SearchHighlighter {
 		global $wgSearchHighlightBoundaries;
 		$fname = __METHOD__;
 
-		if ( $text == '' )
+		if ( $text == '' ) {
 			return '';
+		}
 
 		// spli text into text + templates/links/tables
 		$spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
@@ -962,7 +1040,7 @@ class SearchHighlighter {
 		}
 		$spat .= '/';
 		$textExt = array(); // text extracts
-		$otherExt = array();  // other extracts
+		$otherExt = array(); // other extracts
 		wfProfileIn( "$fname-split" );
 		$start = 0;
 		$textLen = strlen( $text );
@@ -976,8 +1054,9 @@ class SearchHighlighter {
 						if ( $key == 2 ) {
 							// see if this is an image link
 							$ns = substr( $val[0], 2, - 1 );
-							if ( $wgContLang->getNsIndex( $ns ) != NS_FILE )
+							if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
 								break;
+							}
 
 						}
 						$epat = $endPatterns[$key];
@@ -998,7 +1077,7 @@ class SearchHighlighter {
 								$len = strlen( $endMatches[2][0] );
 								$off = $endMatches[2][1];
 								$this->splitAndAdd( $otherExt, $count,
-									substr( $text, $start, $off + $len  - $start ) );
+									substr( $text, $start, $off + $len - $start ) );
 								$start = $off + $len;
 								$found = true;
 								break;
@@ -1111,7 +1190,7 @@ class SearchHighlighter {
 			// if begin of the article contains the whole phrase, show only that !!
 			if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
 				&& $offsets[$first] < $contextchars * 2 ) {
-				$snippets = array ( $first => $snippets[$first] );
+				$snippets = array( $first => $snippets[$first] );
 			}
 
 			// calc by how much to extend existing snippets
@@ -1131,8 +1210,8 @@ class SearchHighlighter {
 				// add more lines
 				$add = $index + 1;
 				while ( $len < $targetchars - 20
-					   && array_key_exists( $add, $all )
-					   && !array_key_exists( $add, $snippets ) ) {
+						&& array_key_exists( $add, $all )
+						&& !array_key_exists( $add, $snippets ) ) {
 					$offsets[$add] = 0;
 					$tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
 					$extended[$add] = $tt;
@@ -1142,22 +1221,24 @@ class SearchHighlighter {
 			}
 		}
 
-		// $snippets = array_map('htmlspecialchars', $extended);
+		// $snippets = array_map( 'htmlspecialchars', $extended );
 		$snippets = $extended;
 		$last = - 1;
 		$extract = '';
 		foreach ( $snippets as $index => $line ) {
-			if ( $last == - 1 )
+			if ( $last == - 1 ) {
 				$extract .= $line; // first line
-			elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) )
+			} elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) {
 				$extract .= " " . $line; // continous lines
-			else
+			} else {
 				$extract .= '<b> ... </b>' . $line;
+			}
 
 			$last = $index;
 		}
-		if ( $extract )
+		if ( $extract ) {
 			$extract .= '<b> ... </b>';
+		}
 
 		$processed = array();
 		foreach ( $terms as $term ) {
@@ -1177,7 +1258,7 @@ class SearchHighlighter {
 	/**
 	 * Split text into lines and add it to extracts array
 	 *
-	 * @param $extracts Array: index -> $line
+	 * @param array $extracts index -> $line
 	 * @param $count Integer
 	 * @param $text String
 	 */
@@ -1185,8 +1266,9 @@ class SearchHighlighter {
 		$split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text );
 		foreach ( $split as $line ) {
 			$tt = trim( $line );
-			if ( $tt )
+			if ( $tt ) {
 				$extracts[$count++] = $tt;
+			}
 		}
 	}
 
@@ -1232,7 +1314,7 @@ class SearchHighlighter {
 			$posEnd = $end;
 		}
 
-		if ( $end > $start )  {
+		if ( $end > $start ) {
 			return substr( $text, $start, $end - $start );
 		} else {
 			return '';
@@ -1260,8 +1342,9 @@ class SearchHighlighter {
 			while ( $char >= 0x80 && $char < 0xc0 ) {
 				// skip trailing bytes
 				$point++;
-				if ( $point >= strlen( $text ) )
+				if ( $point >= strlen( $text ) ) {
 					return strlen( $text );
+				}
 				$char = ord( $text[$point] );
 			}
 			return $point;
@@ -1272,33 +1355,37 @@ class SearchHighlighter {
 	/**
 	 * Search extracts for a pattern, and return snippets
 	 *
-	 * @param $pattern String: regexp for matching lines
-	 * @param $extracts Array: extracts to search
+	 * @param string $pattern regexp for matching lines
+	 * @param array $extracts extracts to search
 	 * @param $linesleft Integer: number of extracts to make
 	 * @param $contextchars Integer: length of snippet
-	 * @param $out Array: map for highlighted snippets
-	 * @param $offsets Array: map of starting points of snippets
+	 * @param array $out map for highlighted snippets
+	 * @param array $offsets map of starting points of snippets
 	 * @protected
 	 */
 	function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
-		if ( $linesleft == 0 )
+		if ( $linesleft == 0 ) {
 			return; // nothing to do
+		}
 		foreach ( $extracts as $index => $line ) {
-			if ( array_key_exists( $index, $out ) )
+			if ( array_key_exists( $index, $out ) ) {
 				continue; // this line already highlighted
+			}
 
 			$m = array();
-			if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) )
+			if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
 				continue;
+			}
 
 			$offset = $m[0][1];
 			$len = strlen( $m[0][0] );
-			if ( $offset + $len < $contextchars )
+			if ( $offset + $len < $contextchars ) {
 				$begin = 0;
-			elseif ( $len > $contextchars )
+			} elseif ( $len > $contextchars ) {
 				$begin = $offset;
-			else
+			} else {
 				$begin = $offset + intval( ( $len - $contextchars ) / 2 );
+			}
 
 			$end = $begin + $contextchars;
 
@@ -1307,8 +1394,9 @@ class SearchHighlighter {
 			$out[$index] = $this->extract( $line, $begin, $end, $posBegin );
 			$offsets[$index] = $posBegin;
 			$linesleft--;
-			if ( $linesleft == 0 )
+			if ( $linesleft == 0 ) {
 				return;
+			}
 		}
 	}
 
@@ -1321,12 +1409,12 @@ class SearchHighlighter {
 		$fname = __METHOD__;
 		wfProfileIn( $fname );
 
-		// $text = preg_replace("/'{2,5}/", "", $text);
-		// $text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text);
-		// $text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text);
-		// $text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text);
-		// $text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text);
-		// $text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text);
+		// $text = preg_replace( "/'{2,5}/", "", $text );
+		// $text = preg_replace( "/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text );
+		// $text = preg_replace( "/\[\[([^]|]+)\]\]/", "\\1", $text );
+		// $text = preg_replace( "/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text );
+		// $text = preg_replace( "/\\{\\|(.*?)\\|\\}/", "", $text );
+		// $text = preg_replace( "/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text );
 		$text = preg_replace( "/\\{\\{([^|]+?)\\}\\}/", "", $text );
 		$text = preg_replace( "/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text );
 		$text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text );
@@ -1349,16 +1437,17 @@ class SearchHighlighter {
 	 */
 	function linkReplace( $matches ) {
 		$colon = strpos( $matches[1], ':' );
-		if ( $colon === false )
+		if ( $colon === false ) {
 			return $matches[2]; // replace with caption
+		}
 		global $wgContLang;
 		$ns = substr( $matches[1], 0, $colon );
 		$index = $wgContLang->getNsIndex( $ns );
-		if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) )
+		if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
 			return $matches[0]; // return the whole thing
-		else
+		} else {
 			return $matches[2];
-
+		}
 	}
 
 	/**
@@ -1408,8 +1497,7 @@ class SearchHighlighter {
 
 			$line = htmlspecialchars( $pre . $found . $post );
 			$pat2 = '/(' . $terms . ")/i";
-			$line = preg_replace( $pat2,
-			  "<span class='searchmatch'>\\1</span>", $line );
+			$line = preg_replace( $pat2, "<span class='searchmatch'>\\1</span>", $line );
 
 			$extract .= "${line}\n";
 		}