diff options
Diffstat (limited to 'includes/search/SearchEngine.php')
-rw-r--r-- | includes/search/SearchEngine.php | 244 |
1 files changed, 166 insertions, 78 deletions
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index 27a321ac..71c05d8b 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -45,7 +45,7 @@ class SearchEngine { */ protected $db; - function __construct($db = null) { + function __construct( $db = null ) { if ( $db ) { $this->db = $db; } else { @@ -58,8 +58,8 @@ class SearchEngine { * If title searches are not supported or disabled, return null. * STUB * - * @param $term String: raw search term - * @return SearchResultSet + * @param string $term raw search term + * @return SearchResultSet|Status|null */ function searchText( $term ) { return null; @@ -70,8 +70,8 @@ class SearchEngine { * If title searches are not supported or disabled, return null. * STUB * - * @param $term String: raw search term - * @return SearchResultSet + * @param string $term raw search term + * @return SearchResultSet|null */ function searchTitle( $term ) { return null; @@ -93,8 +93,9 @@ class SearchEngine { * @return Boolean */ public function supports( $feature ) { - switch( $feature ) { + switch ( $feature ) { case 'list-redirects': + case 'search-update': return true; case 'title-suffix-filter': default: @@ -118,7 +119,7 @@ class SearchEngine { * on text to be used for searching or updating search index. * Default implementation does nothing (simply returns $string). * - * @param $string string: String to process + * @param string $string String to process * @return string */ public function normalizeText( $string ) { @@ -163,7 +164,7 @@ class SearchEngine { /** * Really find the title match. - * @return null|\Title + * @return null|Title */ private static function getNearMatchInternal( $searchterm ) { global $wgContLang, $wgEnableSearchContributorsByIP; @@ -183,10 +184,15 @@ class SearchEngine { # Exact match? No need to look further. $title = Title::newFromText( $term ); - if ( is_null( $title ) ){ + if ( is_null( $title ) ) { return null; } + # Try files if searching in the Media: namespace + if ( $title->getNamespace() == NS_MEDIA ) { + $title = Title::makeTitle( NS_FILE, $title->getText() ); + } + if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) { return $title; } @@ -197,22 +203,23 @@ class SearchEngine { return $title; } + if ( !wfRunHooks( 'SearchAfterNoDirectMatch', array( $term, &$title ) ) ) { + return $title; + } + # Now try all lower case (i.e. first letter capitalized) - # $title = Title::newFromText( $wgContLang->lc( $term ) ); if ( $title && $title->exists() ) { return $title; } # Now try capitalized string - # $title = Title::newFromText( $wgContLang->ucwords( $term ) ); if ( $title && $title->exists() ) { return $title; } # Now try all upper case - # $title = Title::newFromText( $wgContLang->uc( $term ) ); if ( $title && $title->exists() ) { return $title; @@ -233,7 +240,6 @@ class SearchEngine { $title = Title::newFromText( $searchterm ); - # Entering an IP address goes to the contributions page if ( $wgEnableSearchContributorsByIP ) { if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) ) @@ -242,7 +248,6 @@ class SearchEngine { } } - # Entering a user goes to the user page whether it's there or not if ( $title->getNamespace() == NS_USER ) { return $title; @@ -327,8 +332,9 @@ class SearchEngine { $parsed = substr( $query, strlen( $prefix ) + 1 ); } } - if ( trim( $parsed ) == '' ) + if ( trim( $parsed ) == '' ) { $parsed = $query; // prefix was the whole query + } wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) ); @@ -416,8 +422,9 @@ class SearchEngine { $formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces ); foreach ( $formatted as $key => $ns ) { - if ( empty( $ns ) ) + if ( empty( $ns ) ) { $formatted[$key] = wfMessage( 'blanknamespace' )->text(); + } } return $formatted; } @@ -447,23 +454,46 @@ class SearchEngine { * Load up the appropriate search engine class for the currently * active database backend, and return a configured instance. * + * @param String $type Type of search backend, if not the default * @return SearchEngine */ - public static function create() { + public static function create( $type = null ) { global $wgSearchType; $dbr = null; - if ( $wgSearchType ) { + + $alternatives = self::getSearchTypes(); + + if ( $type && in_array( $type, $alternatives ) ) { + $class = $type; + } elseif ( $wgSearchType !== null ) { $class = $wgSearchType; } else { $dbr = wfGetDB( DB_SLAVE ); $class = $dbr->getSearchEngine(); } + $search = new $class( $dbr ); $search->setLimitOffset( 0, 0 ); return $search; } /** + * Return the search engines we support. If only $wgSearchType + * is set, it'll be an array of just that one item. + * + * @return array + */ + public static function getSearchTypes() { + global $wgSearchType, $wgSearchTypeAlternatives; + static $alternatives = null; + if ( $alternatives === null ) { + $alternatives = $wgSearchTypeAlternatives ?: array(); + array_unshift( $alternatives, $wgSearchType ); + } + return $alternatives; + } + + /** * Create or update the search index record for the given page. * Title and text should be pre-processed. * STUB @@ -489,6 +519,18 @@ class SearchEngine { } /** + * Delete an indexed page + * Title should be pre-processed. + * STUB + * + * @param Integer $id Page id that was deleted + * @param String $title Title of page that was deleted + */ + function delete( $id, $title ) { + // no-op + } + + /** * Get OpenSearch suggestion template * * @return String @@ -505,6 +547,31 @@ class SearchEngine { return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns; } } + + /** + * Get the raw text for updating the index from a content object + * Nicer search backends could possibly do something cooler than + * just returning raw text + * + * @todo This isn't ideal, we'd really like to have content-specific handling here + * @param Title $t Title we're indexing + * @param Content $c Content of the page to index + * @return string + */ + public function getTextFromContent( Title $t, Content $c = null ) { + return $c ? $c->getTextForSearchIndex() : ''; + } + + /** + * If an implementation of SearchEngine handles all of its own text processing + * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s + * rather silly handling, it should return true here instead. + * + * @return bool + */ + public function textAlreadyUpdatedForIndex() { + return false; + } } /** @@ -637,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet { } function numRows() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } return $this->mResultSet->numRows(); } function next() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } $row = $this->mResultSet->fetchObject(); - if ( $row === false ) + if ( $row === false ) { return false; + } return SearchResult::newFromRow( $row ); } function free() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } $this->mResultSet->free(); } @@ -669,7 +740,6 @@ class SearchResultTooMany { # # Some search engines may bail out if too many matches are found } - /** * @todo FIXME: This class is horribly factored. It would probably be better to * have a useful base class to which you pass some standard information, then @@ -747,8 +817,9 @@ class SearchResult { wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) ); $this->mRevision = Revision::newFromTitle( $this->mTitle, $id, Revision::READ_NORMAL ); - if ( $this->mTitle->getNamespace() === NS_FILE ) + if ( $this->mTitle->getNamespace() === NS_FILE ) { $this->mImage = wfFindFile( $this->mTitle ); + } } } @@ -758,8 +829,9 @@ class SearchResult { * @return Boolean */ function isBrokenTitle() { - if ( is_null( $this->mTitle ) ) + if ( is_null( $this->mTitle ) ) { return true; + } return false; } @@ -791,31 +863,35 @@ class SearchResult { */ protected function initText() { if ( !isset( $this->mText ) ) { - if ( $this->mRevision != null ) - $this->mText = $this->mRevision->getText(); - else // TODO: can we fetch raw wikitext for commons images? + if ( $this->mRevision != null ) { + $this->mText = SearchEngine::create() + ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() ); + } else { // TODO: can we fetch raw wikitext for commons images? $this->mText = ''; - + } } } /** - * @param $terms Array: terms to highlight + * @param array $terms terms to highlight * @return String: highlighted text snippet, null (and not '') if not supported */ function getTextSnippet( $terms ) { - global $wgUser, $wgAdvancedSearchHighlighting; + global $wgAdvancedSearchHighlighting; $this->initText(); - list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser ); + + // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter. + list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs(); $h = new SearchHighlighter(); - if ( $wgAdvancedSearchHighlighting ) + if ( $wgAdvancedSearchHighlighting ) { return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); - else + } else { return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); + } } /** - * @param $terms Array: terms to highlight + * @param array $terms terms to highlight * @return String: highlighted title, '' if not supported */ function getTitleSnippet( $terms ) { @@ -823,7 +899,7 @@ class SearchResult { } /** - * @param $terms Array: terms to highlight + * @param array $terms terms to highlight * @return String: highlighted redirect name (redirect to this page), '' if none or not supported */ function getRedirectSnippet( $terms ) { @@ -855,10 +931,11 @@ class SearchResult { * @return String: timestamp */ function getTimestamp() { - if ( $this->mRevision ) + if ( $this->mRevision ) { return $this->mRevision->getTimestamp(); - elseif ( $this->mImage ) + } elseif ( $this->mImage ) { return $this->mImage->getTimestamp(); + } return ''; } @@ -934,7 +1011,7 @@ class SearchHighlighter { * Default implementation of wikitext highlighting * * @param $text String - * @param $terms Array: terms to highlight (unescaped) + * @param array $terms terms to highlight (unescaped) * @param $contextlines Integer * @param $contextchars Integer * @return String @@ -944,8 +1021,9 @@ class SearchHighlighter { global $wgSearchHighlightBoundaries; $fname = __METHOD__; - if ( $text == '' ) + if ( $text == '' ) { return ''; + } // spli text into text + templates/links/tables $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; @@ -962,7 +1040,7 @@ class SearchHighlighter { } $spat .= '/'; $textExt = array(); // text extracts - $otherExt = array(); // other extracts + $otherExt = array(); // other extracts wfProfileIn( "$fname-split" ); $start = 0; $textLen = strlen( $text ); @@ -976,8 +1054,9 @@ class SearchHighlighter { if ( $key == 2 ) { // see if this is an image link $ns = substr( $val[0], 2, - 1 ); - if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) + if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) { break; + } } $epat = $endPatterns[$key]; @@ -998,7 +1077,7 @@ class SearchHighlighter { $len = strlen( $endMatches[2][0] ); $off = $endMatches[2][1]; $this->splitAndAdd( $otherExt, $count, - substr( $text, $start, $off + $len - $start ) ); + substr( $text, $start, $off + $len - $start ) ); $start = $off + $len; $found = true; break; @@ -1111,7 +1190,7 @@ class SearchHighlighter { // if begin of the article contains the whole phrase, show only that !! if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] ) && $offsets[$first] < $contextchars * 2 ) { - $snippets = array ( $first => $snippets[$first] ); + $snippets = array( $first => $snippets[$first] ); } // calc by how much to extend existing snippets @@ -1131,8 +1210,8 @@ class SearchHighlighter { // add more lines $add = $index + 1; while ( $len < $targetchars - 20 - && array_key_exists( $add, $all ) - && !array_key_exists( $add, $snippets ) ) { + && array_key_exists( $add, $all ) + && !array_key_exists( $add, $snippets ) ) { $offsets[$add] = 0; $tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] ); $extended[$add] = $tt; @@ -1142,22 +1221,24 @@ class SearchHighlighter { } } - // $snippets = array_map('htmlspecialchars', $extended); + // $snippets = array_map( 'htmlspecialchars', $extended ); $snippets = $extended; $last = - 1; $extract = ''; foreach ( $snippets as $index => $line ) { - if ( $last == - 1 ) + if ( $last == - 1 ) { $extract .= $line; // first line - elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) + } elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) { $extract .= " " . $line; // continous lines - else + } else { $extract .= '<b> ... </b>' . $line; + } $last = $index; } - if ( $extract ) + if ( $extract ) { $extract .= '<b> ... </b>'; + } $processed = array(); foreach ( $terms as $term ) { @@ -1177,7 +1258,7 @@ class SearchHighlighter { /** * Split text into lines and add it to extracts array * - * @param $extracts Array: index -> $line + * @param array $extracts index -> $line * @param $count Integer * @param $text String */ @@ -1185,8 +1266,9 @@ class SearchHighlighter { $split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text ); foreach ( $split as $line ) { $tt = trim( $line ); - if ( $tt ) + if ( $tt ) { $extracts[$count++] = $tt; + } } } @@ -1232,7 +1314,7 @@ class SearchHighlighter { $posEnd = $end; } - if ( $end > $start ) { + if ( $end > $start ) { return substr( $text, $start, $end - $start ); } else { return ''; @@ -1260,8 +1342,9 @@ class SearchHighlighter { while ( $char >= 0x80 && $char < 0xc0 ) { // skip trailing bytes $point++; - if ( $point >= strlen( $text ) ) + if ( $point >= strlen( $text ) ) { return strlen( $text ); + } $char = ord( $text[$point] ); } return $point; @@ -1272,33 +1355,37 @@ class SearchHighlighter { /** * Search extracts for a pattern, and return snippets * - * @param $pattern String: regexp for matching lines - * @param $extracts Array: extracts to search + * @param string $pattern regexp for matching lines + * @param array $extracts extracts to search * @param $linesleft Integer: number of extracts to make * @param $contextchars Integer: length of snippet - * @param $out Array: map for highlighted snippets - * @param $offsets Array: map of starting points of snippets + * @param array $out map for highlighted snippets + * @param array $offsets map of starting points of snippets * @protected */ function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) { - if ( $linesleft == 0 ) + if ( $linesleft == 0 ) { return; // nothing to do + } foreach ( $extracts as $index => $line ) { - if ( array_key_exists( $index, $out ) ) + if ( array_key_exists( $index, $out ) ) { continue; // this line already highlighted + } $m = array(); - if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) + if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) { continue; + } $offset = $m[0][1]; $len = strlen( $m[0][0] ); - if ( $offset + $len < $contextchars ) + if ( $offset + $len < $contextchars ) { $begin = 0; - elseif ( $len > $contextchars ) + } elseif ( $len > $contextchars ) { $begin = $offset; - else + } else { $begin = $offset + intval( ( $len - $contextchars ) / 2 ); + } $end = $begin + $contextchars; @@ -1307,8 +1394,9 @@ class SearchHighlighter { $out[$index] = $this->extract( $line, $begin, $end, $posBegin ); $offsets[$index] = $posBegin; $linesleft--; - if ( $linesleft == 0 ) + if ( $linesleft == 0 ) { return; + } } } @@ -1321,12 +1409,12 @@ class SearchHighlighter { $fname = __METHOD__; wfProfileIn( $fname ); - // $text = preg_replace("/'{2,5}/", "", $text); - // $text = preg_replace("/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text); - // $text = preg_replace("/\[\[([^]|]+)\]\]/", "\\1", $text); - // $text = preg_replace("/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text); - // $text = preg_replace("/\\{\\|(.*?)\\|\\}/", "", $text); - // $text = preg_replace("/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text); + // $text = preg_replace( "/'{2,5}/", "", $text ); + // $text = preg_replace( "/\[[a-z]+:\/\/[^ ]+ ([^]]+)\]/", "\\2", $text ); + // $text = preg_replace( "/\[\[([^]|]+)\]\]/", "\\1", $text ); + // $text = preg_replace( "/\[\[([^]]+\|)?([^|]]+)\]\]/", "\\2", $text ); + // $text = preg_replace( "/\\{\\|(.*?)\\|\\}/", "", $text ); + // $text = preg_replace( "/\\[\\[[A-Za-z_-]+:([^|]+?)\\]\\]/", "", $text ); $text = preg_replace( "/\\{\\{([^|]+?)\\}\\}/", "", $text ); $text = preg_replace( "/\\{\\{([^|]+\\|)(.*?)\\}\\}/", "\\2", $text ); $text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text ); @@ -1349,16 +1437,17 @@ class SearchHighlighter { */ function linkReplace( $matches ) { $colon = strpos( $matches[1], ':' ); - if ( $colon === false ) + if ( $colon === false ) { return $matches[2]; // replace with caption + } global $wgContLang; $ns = substr( $matches[1], 0, $colon ); $index = $wgContLang->getNsIndex( $ns ); - if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) + if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) { return $matches[0]; // return the whole thing - else + } else { return $matches[2]; - + } } /** @@ -1408,8 +1497,7 @@ class SearchHighlighter { $line = htmlspecialchars( $pre . $found . $post ); $pat2 = '/(' . $terms . ")/i"; - $line = preg_replace( $pat2, - "<span class='searchmatch'>\\1</span>", $line ); + $line = preg_replace( $pat2, "<span class='searchmatch'>\\1</span>", $line ); $extract .= "${line}\n"; } |