diff options
Diffstat (limited to 'includes/search/SearchEngine.php')
-rw-r--r-- | includes/search/SearchEngine.php | 169 |
1 files changed, 125 insertions, 44 deletions
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index 6b3e62b1..71c05d8b 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -59,7 +59,7 @@ class SearchEngine { * STUB * * @param string $term raw search term - * @return SearchResultSet + * @return SearchResultSet|Status|null */ function searchText( $term ) { return null; @@ -71,7 +71,7 @@ class SearchEngine { * STUB * * @param string $term raw search term - * @return SearchResultSet + * @return SearchResultSet|null */ function searchTitle( $term ) { return null; @@ -93,8 +93,9 @@ class SearchEngine { * @return Boolean */ public function supports( $feature ) { - switch( $feature ) { + switch ( $feature ) { case 'list-redirects': + case 'search-update': return true; case 'title-suffix-filter': default: @@ -331,8 +332,9 @@ class SearchEngine { $parsed = substr( $query, strlen( $prefix ) + 1 ); } } - if ( trim( $parsed ) == '' ) + if ( trim( $parsed ) == '' ) { $parsed = $query; // prefix was the whole query + } wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) ); @@ -420,8 +422,9 @@ class SearchEngine { $formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces ); foreach ( $formatted as $key => $ns ) { - if ( empty( $ns ) ) + if ( empty( $ns ) ) { $formatted[$key] = wfMessage( 'blanknamespace' )->text(); + } } return $formatted; } @@ -451,23 +454,46 @@ class SearchEngine { * Load up the appropriate search engine class for the currently * active database backend, and return a configured instance. * + * @param String $type Type of search backend, if not the default * @return SearchEngine */ - public static function create() { + public static function create( $type = null ) { global $wgSearchType; $dbr = null; - if ( $wgSearchType ) { + + $alternatives = self::getSearchTypes(); + + if ( $type && in_array( $type, $alternatives ) ) { + $class = $type; + } elseif ( $wgSearchType !== null ) { $class = $wgSearchType; } else { $dbr = wfGetDB( DB_SLAVE ); $class = $dbr->getSearchEngine(); } + $search = new $class( $dbr ); $search->setLimitOffset( 0, 0 ); return $search; } /** + * Return the search engines we support. If only $wgSearchType + * is set, it'll be an array of just that one item. + * + * @return array + */ + public static function getSearchTypes() { + global $wgSearchType, $wgSearchTypeAlternatives; + static $alternatives = null; + if ( $alternatives === null ) { + $alternatives = $wgSearchTypeAlternatives ?: array(); + array_unshift( $alternatives, $wgSearchType ); + } + return $alternatives; + } + + /** * Create or update the search index record for the given page. * Title and text should be pre-processed. * STUB @@ -493,6 +519,18 @@ class SearchEngine { } /** + * Delete an indexed page + * Title should be pre-processed. + * STUB + * + * @param Integer $id Page id that was deleted + * @param String $title Title of page that was deleted + */ + function delete( $id, $title ) { + // no-op + } + + /** * Get OpenSearch suggestion template * * @return String @@ -509,6 +547,31 @@ class SearchEngine { return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns; } } + + /** + * Get the raw text for updating the index from a content object + * Nicer search backends could possibly do something cooler than + * just returning raw text + * + * @todo This isn't ideal, we'd really like to have content-specific handling here + * @param Title $t Title we're indexing + * @param Content $c Content of the page to index + * @return string + */ + public function getTextFromContent( Title $t, Content $c = null ) { + return $c ? $c->getTextForSearchIndex() : ''; + } + + /** + * If an implementation of SearchEngine handles all of its own text processing + * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s + * rather silly handling, it should return true here instead. + * + * @return bool + */ + public function textAlreadyUpdatedForIndex() { + return false; + } } /** @@ -641,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet { } function numRows() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } return $this->mResultSet->numRows(); } function next() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } $row = $this->mResultSet->fetchObject(); - if ( $row === false ) + if ( $row === false ) { return false; + } return SearchResult::newFromRow( $row ); } function free() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } $this->mResultSet->free(); } @@ -750,8 +817,9 @@ class SearchResult { wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) ); $this->mRevision = Revision::newFromTitle( $this->mTitle, $id, Revision::READ_NORMAL ); - if ( $this->mTitle->getNamespace() === NS_FILE ) + if ( $this->mTitle->getNamespace() === NS_FILE ) { $this->mImage = wfFindFile( $this->mTitle ); + } } } @@ -761,8 +829,9 @@ class SearchResult { * @return Boolean */ function isBrokenTitle() { - if ( is_null( $this->mTitle ) ) + if ( is_null( $this->mTitle ) ) { return true; + } return false; } @@ -795,10 +864,8 @@ class SearchResult { protected function initText() { if ( !isset( $this->mText ) ) { if ( $this->mRevision != null ) { - //TODO: if we could plug in some code that knows about special content models *and* about - // special features of the search engine, the search could benefit. - $content = $this->mRevision->getContent(); - $this->mText = $content ? $content->getTextForSearchIndex() : ''; + $this->mText = SearchEngine::create() + ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() ); } else { // TODO: can we fetch raw wikitext for commons images? $this->mText = ''; } @@ -810,16 +877,17 @@ class SearchResult { * @return String: highlighted text snippet, null (and not '') if not supported */ function getTextSnippet( $terms ) { - global $wgUser, $wgAdvancedSearchHighlighting; + global $wgAdvancedSearchHighlighting; $this->initText(); // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter. - list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser ); + list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs(); $h = new SearchHighlighter(); - if ( $wgAdvancedSearchHighlighting ) + if ( $wgAdvancedSearchHighlighting ) { return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); - else + } else { return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); + } } /** @@ -863,10 +931,11 @@ class SearchResult { * @return String: timestamp */ function getTimestamp() { - if ( $this->mRevision ) + if ( $this->mRevision ) { return $this->mRevision->getTimestamp(); - elseif ( $this->mImage ) + } elseif ( $this->mImage ) { return $this->mImage->getTimestamp(); + } return ''; } @@ -952,8 +1021,9 @@ class SearchHighlighter { global $wgSearchHighlightBoundaries; $fname = __METHOD__; - if ( $text == '' ) + if ( $text == '' ) { return ''; + } // spli text into text + templates/links/tables $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; @@ -984,8 +1054,9 @@ class SearchHighlighter { if ( $key == 2 ) { // see if this is an image link $ns = substr( $val[0], 2, - 1 ); - if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) + if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) { break; + } } $epat = $endPatterns[$key]; @@ -1006,7 +1077,7 @@ class SearchHighlighter { $len = strlen( $endMatches[2][0] ); $off = $endMatches[2][1]; $this->splitAndAdd( $otherExt, $count, - substr( $text, $start, $off + $len - $start ) ); + substr( $text, $start, $off + $len - $start ) ); $start = $off + $len; $found = true; break; @@ -1119,7 +1190,7 @@ class SearchHighlighter { // if begin of the article contains the whole phrase, show only that !! if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] ) && $offsets[$first] < $contextchars * 2 ) { - $snippets = array ( $first => $snippets[$first] ); + $snippets = array( $first => $snippets[$first] ); } // calc by how much to extend existing snippets @@ -1155,17 +1226,19 @@ class SearchHighlighter { $last = - 1; $extract = ''; foreach ( $snippets as $index => $line ) { - if ( $last == - 1 ) + if ( $last == - 1 ) { $extract .= $line; // first line - elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) + } elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) { $extract .= " " . $line; // continous lines - else + } else { $extract .= '<b> ... </b>' . $line; + } $last = $index; } - if ( $extract ) + if ( $extract ) { $extract .= '<b> ... </b>'; + } $processed = array(); foreach ( $terms as $term ) { @@ -1193,8 +1266,9 @@ class SearchHighlighter { $split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text ); foreach ( $split as $line ) { $tt = trim( $line ); - if ( $tt ) + if ( $tt ) { $extracts[$count++] = $tt; + } } } @@ -1268,8 +1342,9 @@ class SearchHighlighter { while ( $char >= 0x80 && $char < 0xc0 ) { // skip trailing bytes $point++; - if ( $point >= strlen( $text ) ) + if ( $point >= strlen( $text ) ) { return strlen( $text ); + } $char = ord( $text[$point] ); } return $point; @@ -1289,24 +1364,28 @@ class SearchHighlighter { * @protected */ function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) { - if ( $linesleft == 0 ) + if ( $linesleft == 0 ) { return; // nothing to do + } foreach ( $extracts as $index => $line ) { - if ( array_key_exists( $index, $out ) ) + if ( array_key_exists( $index, $out ) ) { continue; // this line already highlighted + } $m = array(); - if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) + if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) { continue; + } $offset = $m[0][1]; $len = strlen( $m[0][0] ); - if ( $offset + $len < $contextchars ) + if ( $offset + $len < $contextchars ) { $begin = 0; - elseif ( $len > $contextchars ) + } elseif ( $len > $contextchars ) { $begin = $offset; - else + } else { $begin = $offset + intval( ( $len - $contextchars ) / 2 ); + } $end = $begin + $contextchars; @@ -1315,8 +1394,9 @@ class SearchHighlighter { $out[$index] = $this->extract( $line, $begin, $end, $posBegin ); $offsets[$index] = $posBegin; $linesleft--; - if ( $linesleft == 0 ) + if ( $linesleft == 0 ) { return; + } } } @@ -1357,16 +1437,17 @@ class SearchHighlighter { */ function linkReplace( $matches ) { $colon = strpos( $matches[1], ':' ); - if ( $colon === false ) + if ( $colon === false ) { return $matches[2]; // replace with caption + } global $wgContLang; $ns = substr( $matches[1], 0, $colon ); $index = $wgContLang->getNsIndex( $ns ); - if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) + if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) { return $matches[0]; // return the whole thing - else + } else { return $matches[2]; - + } } /** |