diff options
Diffstat (limited to 'includes/search')
-rw-r--r-- | includes/search/SearchEngine.php | 169 | ||||
-rw-r--r-- | includes/search/SearchMssql.php | 6 | ||||
-rw-r--r-- | includes/search/SearchMySQL.php | 66 | ||||
-rw-r--r-- | includes/search/SearchOracle.php | 86 | ||||
-rw-r--r-- | includes/search/SearchPostgres.php | 43 | ||||
-rw-r--r-- | includes/search/SearchSqlite.php | 39 | ||||
-rw-r--r-- | includes/search/SearchUpdate.php | 123 |
7 files changed, 342 insertions, 190 deletions
diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index 6b3e62b1..71c05d8b 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -59,7 +59,7 @@ class SearchEngine { * STUB * * @param string $term raw search term - * @return SearchResultSet + * @return SearchResultSet|Status|null */ function searchText( $term ) { return null; @@ -71,7 +71,7 @@ class SearchEngine { * STUB * * @param string $term raw search term - * @return SearchResultSet + * @return SearchResultSet|null */ function searchTitle( $term ) { return null; @@ -93,8 +93,9 @@ class SearchEngine { * @return Boolean */ public function supports( $feature ) { - switch( $feature ) { + switch ( $feature ) { case 'list-redirects': + case 'search-update': return true; case 'title-suffix-filter': default: @@ -331,8 +332,9 @@ class SearchEngine { $parsed = substr( $query, strlen( $prefix ) + 1 ); } } - if ( trim( $parsed ) == '' ) + if ( trim( $parsed ) == '' ) { $parsed = $query; // prefix was the whole query + } wfRunHooks( 'SearchEngineReplacePrefixesComplete', array( $this, $query, &$parsed ) ); @@ -420,8 +422,9 @@ class SearchEngine { $formatted = array_map( array( $wgContLang, 'getFormattedNsText' ), $namespaces ); foreach ( $formatted as $key => $ns ) { - if ( empty( $ns ) ) + if ( empty( $ns ) ) { $formatted[$key] = wfMessage( 'blanknamespace' )->text(); + } } return $formatted; } @@ -451,23 +454,46 @@ class SearchEngine { * Load up the appropriate search engine class for the currently * active database backend, and return a configured instance. * + * @param String $type Type of search backend, if not the default * @return SearchEngine */ - public static function create() { + public static function create( $type = null ) { global $wgSearchType; $dbr = null; - if ( $wgSearchType ) { + + $alternatives = self::getSearchTypes(); + + if ( $type && in_array( $type, $alternatives ) ) { + $class = $type; + } elseif ( $wgSearchType !== null ) { $class = $wgSearchType; } else { $dbr = wfGetDB( DB_SLAVE ); $class = $dbr->getSearchEngine(); } + $search = new $class( $dbr ); $search->setLimitOffset( 0, 0 ); return $search; } /** + * Return the search engines we support. If only $wgSearchType + * is set, it'll be an array of just that one item. + * + * @return array + */ + public static function getSearchTypes() { + global $wgSearchType, $wgSearchTypeAlternatives; + static $alternatives = null; + if ( $alternatives === null ) { + $alternatives = $wgSearchTypeAlternatives ?: array(); + array_unshift( $alternatives, $wgSearchType ); + } + return $alternatives; + } + + /** * Create or update the search index record for the given page. * Title and text should be pre-processed. * STUB @@ -493,6 +519,18 @@ class SearchEngine { } /** + * Delete an indexed page + * Title should be pre-processed. + * STUB + * + * @param Integer $id Page id that was deleted + * @param String $title Title of page that was deleted + */ + function delete( $id, $title ) { + // no-op + } + + /** * Get OpenSearch suggestion template * * @return String @@ -509,6 +547,31 @@ class SearchEngine { return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns; } } + + /** + * Get the raw text for updating the index from a content object + * Nicer search backends could possibly do something cooler than + * just returning raw text + * + * @todo This isn't ideal, we'd really like to have content-specific handling here + * @param Title $t Title we're indexing + * @param Content $c Content of the page to index + * @return string + */ + public function getTextFromContent( Title $t, Content $c = null ) { + return $c ? $c->getTextForSearchIndex() : ''; + } + + /** + * If an implementation of SearchEngine handles all of its own text processing + * in getTextFromContent() and doesn't require SearchUpdate::updateText()'s + * rather silly handling, it should return true here instead. + * + * @return bool + */ + public function textAlreadyUpdatedForIndex() { + return false; + } } /** @@ -641,26 +704,30 @@ class SqlSearchResultSet extends SearchResultSet { } function numRows() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } return $this->mResultSet->numRows(); } function next() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } $row = $this->mResultSet->fetchObject(); - if ( $row === false ) + if ( $row === false ) { return false; + } return SearchResult::newFromRow( $row ); } function free() { - if ( $this->mResultSet === false ) + if ( $this->mResultSet === false ) { return false; + } $this->mResultSet->free(); } @@ -750,8 +817,9 @@ class SearchResult { wfRunHooks( 'SearchResultInitFromTitle', array( $title, &$id ) ); $this->mRevision = Revision::newFromTitle( $this->mTitle, $id, Revision::READ_NORMAL ); - if ( $this->mTitle->getNamespace() === NS_FILE ) + if ( $this->mTitle->getNamespace() === NS_FILE ) { $this->mImage = wfFindFile( $this->mTitle ); + } } } @@ -761,8 +829,9 @@ class SearchResult { * @return Boolean */ function isBrokenTitle() { - if ( is_null( $this->mTitle ) ) + if ( is_null( $this->mTitle ) ) { return true; + } return false; } @@ -795,10 +864,8 @@ class SearchResult { protected function initText() { if ( !isset( $this->mText ) ) { if ( $this->mRevision != null ) { - //TODO: if we could plug in some code that knows about special content models *and* about - // special features of the search engine, the search could benefit. - $content = $this->mRevision->getContent(); - $this->mText = $content ? $content->getTextForSearchIndex() : ''; + $this->mText = SearchEngine::create() + ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() ); } else { // TODO: can we fetch raw wikitext for commons images? $this->mText = ''; } @@ -810,16 +877,17 @@ class SearchResult { * @return String: highlighted text snippet, null (and not '') if not supported */ function getTextSnippet( $terms ) { - global $wgUser, $wgAdvancedSearchHighlighting; + global $wgAdvancedSearchHighlighting; $this->initText(); // TODO: make highliter take a content object. Make ContentHandler a factory for SearchHighliter. - list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs( $wgUser ); + list( $contextlines, $contextchars ) = SearchEngine::userHighlightPrefs(); $h = new SearchHighlighter(); - if ( $wgAdvancedSearchHighlighting ) + if ( $wgAdvancedSearchHighlighting ) { return $h->highlightText( $this->mText, $terms, $contextlines, $contextchars ); - else + } else { return $h->highlightSimple( $this->mText, $terms, $contextlines, $contextchars ); + } } /** @@ -863,10 +931,11 @@ class SearchResult { * @return String: timestamp */ function getTimestamp() { - if ( $this->mRevision ) + if ( $this->mRevision ) { return $this->mRevision->getTimestamp(); - elseif ( $this->mImage ) + } elseif ( $this->mImage ) { return $this->mImage->getTimestamp(); + } return ''; } @@ -952,8 +1021,9 @@ class SearchHighlighter { global $wgSearchHighlightBoundaries; $fname = __METHOD__; - if ( $text == '' ) + if ( $text == '' ) { return ''; + } // spli text into text + templates/links/tables $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; @@ -984,8 +1054,9 @@ class SearchHighlighter { if ( $key == 2 ) { // see if this is an image link $ns = substr( $val[0], 2, - 1 ); - if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) + if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) { break; + } } $epat = $endPatterns[$key]; @@ -1006,7 +1077,7 @@ class SearchHighlighter { $len = strlen( $endMatches[2][0] ); $off = $endMatches[2][1]; $this->splitAndAdd( $otherExt, $count, - substr( $text, $start, $off + $len - $start ) ); + substr( $text, $start, $off + $len - $start ) ); $start = $off + $len; $found = true; break; @@ -1119,7 +1190,7 @@ class SearchHighlighter { // if begin of the article contains the whole phrase, show only that !! if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] ) && $offsets[$first] < $contextchars * 2 ) { - $snippets = array ( $first => $snippets[$first] ); + $snippets = array( $first => $snippets[$first] ); } // calc by how much to extend existing snippets @@ -1155,17 +1226,19 @@ class SearchHighlighter { $last = - 1; $extract = ''; foreach ( $snippets as $index => $line ) { - if ( $last == - 1 ) + if ( $last == - 1 ) { $extract .= $line; // first line - elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) + } elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) ) { $extract .= " " . $line; // continous lines - else + } else { $extract .= '<b> ... </b>' . $line; + } $last = $index; } - if ( $extract ) + if ( $extract ) { $extract .= '<b> ... </b>'; + } $processed = array(); foreach ( $terms as $term ) { @@ -1193,8 +1266,9 @@ class SearchHighlighter { $split = explode( "\n", $this->mCleanWikitext ? $this->removeWiki( $text ) : $text ); foreach ( $split as $line ) { $tt = trim( $line ); - if ( $tt ) + if ( $tt ) { $extracts[$count++] = $tt; + } } } @@ -1268,8 +1342,9 @@ class SearchHighlighter { while ( $char >= 0x80 && $char < 0xc0 ) { // skip trailing bytes $point++; - if ( $point >= strlen( $text ) ) + if ( $point >= strlen( $text ) ) { return strlen( $text ); + } $char = ord( $text[$point] ); } return $point; @@ -1289,24 +1364,28 @@ class SearchHighlighter { * @protected */ function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) { - if ( $linesleft == 0 ) + if ( $linesleft == 0 ) { return; // nothing to do + } foreach ( $extracts as $index => $line ) { - if ( array_key_exists( $index, $out ) ) + if ( array_key_exists( $index, $out ) ) { continue; // this line already highlighted + } $m = array(); - if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) + if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) { continue; + } $offset = $m[0][1]; $len = strlen( $m[0][0] ); - if ( $offset + $len < $contextchars ) + if ( $offset + $len < $contextchars ) { $begin = 0; - elseif ( $len > $contextchars ) + } elseif ( $len > $contextchars ) { $begin = $offset; - else + } else { $begin = $offset + intval( ( $len - $contextchars ) / 2 ); + } $end = $begin + $contextchars; @@ -1315,8 +1394,9 @@ class SearchHighlighter { $out[$index] = $this->extract( $line, $begin, $end, $posBegin ); $offsets[$index] = $posBegin; $linesleft--; - if ( $linesleft == 0 ) + if ( $linesleft == 0 ) { return; + } } } @@ -1357,16 +1437,17 @@ class SearchHighlighter { */ function linkReplace( $matches ) { $colon = strpos( $matches[1], ':' ); - if ( $colon === false ) + if ( $colon === false ) { return $matches[2]; // replace with caption + } global $wgContLang; $ns = substr( $matches[1], 0, $colon ); $index = $wgContLang->getNsIndex( $ns ); - if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) + if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) { return $matches[0]; // return the whole thing - else + } else { return $matches[2]; - + } } /** diff --git a/includes/search/SearchMssql.php b/includes/search/SearchMssql.php index 163d9dc3..cbc1a7a7 100644 --- a/includes/search/SearchMssql.php +++ b/includes/search/SearchMssql.php @@ -170,8 +170,9 @@ class SearchMssql extends SearchEngine { if ( !empty( $terms[3] ) ) { $regexp = preg_quote( $terms[3], '/' ); - if ( $terms[4] ) + if ( $terms[4] ) { $regexp .= "[0-9A-Za-z_]+"; + } } else { $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); } @@ -247,8 +248,9 @@ class MssqlSearchResultSet extends SearchResultSet { function next() { $row = $this->mResultSet->fetchObject(); - if ( $row === false ) + if ( $row === false ) { return false; + } return new SearchResult( $row ); } } diff --git a/includes/search/SearchMySQL.php b/includes/search/SearchMySQL.php index 4a501fd0..b2bc1c26 100644 --- a/includes/search/SearchMySQL.php +++ b/includes/search/SearchMySQL.php @@ -57,12 +57,12 @@ class SearchMySQL extends SearchEngine { # @todo FIXME: This doesn't handle parenthetical expressions. $m = array(); - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', $filteredText, $m, PREG_SET_ORDER ) ) { - foreach( $m as $bits ) { + foreach ( $m as $bits ) { @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits; - if( $nonQuoted != '' ) { + if ( $nonQuoted != '' ) { $term = $nonQuoted; $quote = ''; } else { @@ -70,8 +70,10 @@ class SearchMySQL extends SearchEngine { $quote = '"'; } - if( $searchon !== '' ) $searchon .= ' '; - if( $this->strictMatching && ($modifier == '') ) { + if ( $searchon !== '' ) { + $searchon .= ' '; + } + if ( $this->strictMatching && ( $modifier == '' ) ) { // If we leave this out, boolean op defaults to OR which is rarely helpful. $modifier = '+'; } @@ -79,7 +81,7 @@ class SearchMySQL extends SearchEngine { // Some languages such as Serbian store the input form in the search index, // so we may need to search for matches in multiple writing system variants. $convertedVariants = $wgContLang->autoConvertToAllVariants( $term ); - if( is_array( $convertedVariants ) ) { + if ( is_array( $convertedVariants ) ) { $variants = array_unique( array_values( $convertedVariants ) ); } else { $variants = array( $term ); @@ -99,11 +101,12 @@ class SearchMySQL extends SearchEngine { $strippedVariants = array_unique( $strippedVariants ); $searchon .= $modifier; - if( count( $strippedVariants) > 1 ) + if ( count( $strippedVariants ) > 1 ) { $searchon .= '('; - foreach( $strippedVariants as $stripped ) { + } + foreach ( $strippedVariants as $stripped ) { $stripped = $this->normalizeText( $stripped ); - if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { + if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { // Hack for Chinese: we need to toss in quotes for // multiple-character phrases since normalizeForSearch() // added spaces between them to make word breaks. @@ -111,8 +114,9 @@ class SearchMySQL extends SearchEngine { } $searchon .= "$quote$stripped$quote$wildcard "; } - if( count( $strippedVariants) > 1 ) + if ( count( $strippedVariants ) > 1 ) { $searchon .= ')'; + } // Match individual terms or quoted phrase in result highlighting... // Note that variants will be introduced in a later stage for highlighting! @@ -134,8 +138,8 @@ class SearchMySQL extends SearchEngine { global $wgContLang; $regex = preg_quote( $string, '/' ); - if( $wgContLang->hasWordBreaks() ) { - if( $wildcard ) { + if ( $wgContLang->hasWordBreaks() ) { + if ( $wildcard ) { // Don't cut off the final bit! $regex = "\b$regex"; } else { @@ -177,7 +181,9 @@ class SearchMySQL extends SearchEngine { global $wgCountTotalSearchHits; // This seems out of place, why is this called with empty term? - if ( trim( $term ) === '' ) return null; + if ( trim( $term ) === '' ) { + return null; + } $filteredTerm = $this->filter( $term ); $query = $this->getQuery( $filteredTerm, $fulltext ); @@ -187,7 +193,7 @@ class SearchMySQL extends SearchEngine { ); $total = null; - if( $wgCountTotalSearchHits ) { + if ( $wgCountTotalSearchHits ) { $query = $this->getCountQuery( $filteredTerm, $fulltext ); $totalResult = $this->db->select( $query['tables'], $query['fields'], $query['conds'], @@ -195,7 +201,7 @@ class SearchMySQL extends SearchEngine { ); $row = $totalResult->fetchObject(); - if( $row ) { + if ( $row ) { $total = intval( $row->c ); } $totalResult->free(); @@ -205,12 +211,11 @@ class SearchMySQL extends SearchEngine { } public function supports( $feature ) { - switch( $feature ) { - case 'list-redirects': + switch ( $feature ) { case 'title-suffix-filter': return true; default: - return false; + return parent::supports( $feature ); } } @@ -223,7 +228,7 @@ class SearchMySQL extends SearchEngine { foreach ( $this->features as $feature => $value ) { if ( $feature === 'list-redirects' && !$value ) { $query['conds']['page_is_redirect'] = 0; - } elseif( $feature === 'title-suffix-filter' && $value ) { + } elseif ( $feature === 'title-suffix-filter' && $value ) { $query['conds'][] = 'page_title' . $this->db->buildLike( $this->db->anyString(), $value ); } } @@ -358,12 +363,25 @@ class SearchMySQL extends SearchEngine { $dbw->update( 'searchindex', array( 'si_title' => $this->normalizeText( $title ) ), - array( 'si_page' => $id ), + array( 'si_page' => $id ), __METHOD__, array( $dbw->lowPriorityOption() ) ); } /** + * Delete an indexed page + * Title should be pre-processed. + * + * @param Integer $id Page id that was deleted + * @param String $title Title of page that was deleted + */ + function delete( $id, $title ) { + $dbw = wfGetDB( DB_MASTER ); + + $dbw->delete( 'searchindex', array( 'si_page' => $id ), __METHOD__ ); + } + + /** * Converts some characters for MySQL's indexing to grok it correctly, * and pads short words to overcome limitations. * @return mixed|string @@ -386,7 +404,7 @@ class SearchMySQL extends SearchEngine { // ignores short words... Pad them so we can pass them // through without reconfiguring the server... $minLength = $this->minSearchLength(); - if( $minLength > 1 ) { + if ( $minLength > 1 ) { $n = $minLength - 1; $out = preg_replace( "/\b(\w{1,$n})\b/", @@ -427,7 +445,7 @@ class SearchMySQL extends SearchEngine { * @return int */ protected function minSearchLength() { - if( is_null( self::$mMinSearchLength ) ) { + if ( is_null( self::$mMinSearchLength ) ) { $sql = "SHOW GLOBAL VARIABLES LIKE 'ft\\_min\\_word\\_len'"; $dbr = wfGetDB( DB_SLAVE ); @@ -435,7 +453,7 @@ class SearchMySQL extends SearchEngine { $row = $result->fetchObject(); $result->free(); - if( $row && $row->Variable_name == 'ft_min_word_len' ) { + if ( $row && $row->Variable_name == 'ft_min_word_len' ) { self::$mMinSearchLength = intval( $row->Value ); } else { self::$mMinSearchLength = 0; @@ -449,7 +467,7 @@ class SearchMySQL extends SearchEngine { * @ingroup Search */ class MySQLSearchResultSet extends SqlSearchResultSet { - function __construct( $resultSet, $terms, $totalHits=null ) { + function __construct( $resultSet, $terms, $totalHits = null ) { parent::__construct( $resultSet, $terms ); $this->mTotalHits = $totalHits; } diff --git a/includes/search/SearchOracle.php b/includes/search/SearchOracle.php index b0ea97fe..a8479654 100644 --- a/includes/search/SearchOracle.php +++ b/includes/search/SearchOracle.php @@ -30,32 +30,34 @@ */ class SearchOracle extends SearchEngine { - private $reservedWords = array ('ABOUT' => 1, - 'ACCUM' => 1, - 'AND' => 1, - 'BT' => 1, - 'BTG' => 1, - 'BTI' => 1, - 'BTP' => 1, - 'FUZZY' => 1, - 'HASPATH' => 1, - 'INPATH' => 1, - 'MINUS' => 1, - 'NEAR' => 1, - 'NOT' => 1, - 'NT' => 1, - 'NTG' => 1, - 'NTI' => 1, - 'NTP' => 1, - 'OR' => 1, - 'PT' => 1, - 'RT' => 1, - 'SQE' => 1, - 'SYN' => 1, - 'TR' => 1, - 'TRSYN' => 1, - 'TT' => 1, - 'WITHIN' => 1); + private $reservedWords = array( + 'ABOUT' => 1, + 'ACCUM' => 1, + 'AND' => 1, + 'BT' => 1, + 'BTG' => 1, + 'BTI' => 1, + 'BTP' => 1, + 'FUZZY' => 1, + 'HASPATH' => 1, + 'INPATH' => 1, + 'MINUS' => 1, + 'NEAR' => 1, + 'NOT' => 1, + 'NT' => 1, + 'NTG' => 1, + 'NTI' => 1, + 'NTP' => 1, + 'OR' => 1, + 'PT' => 1, + 'RT' => 1, + 'SQE' => 1, + 'SYN' => 1, + 'TR' => 1, + 'TRSYN' => 1, + 'TT' => 1, + 'WITHIN' => 1, + ); /** * Creates an instance of this class @@ -72,8 +74,9 @@ class SearchOracle extends SearchEngine { * @return SqlSearchResultSet */ function searchText( $term ) { - if ( $term == '' ) + if ( $term == '' ) { return new SqlSearchResultSet( false, '' ); + } $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), true ) ) ); return new SqlSearchResultSet( $resultSet, $this->searchTerms ); @@ -86,8 +89,9 @@ class SearchOracle extends SearchEngine { * @return SqlSearchResultSet */ function searchTitle( $term ) { - if ( $term == '' ) + if ( $term == '' ) { return new SqlSearchResultSet( false, '' ); + } $resultSet = $this->db->resultObject( $this->db->query( $this->getQuery( $this->filter( $term ), false ) ) ); return new MySQLSearchResultSet( $resultSet, $this->searchTerms ); @@ -110,8 +114,9 @@ class SearchOracle extends SearchEngine { * @return String */ function queryNamespaces() { - if( is_null( $this->namespaces ) ) + if ( is_null( $this->namespaces ) ) { return ''; + } if ( !count( $this->namespaces ) ) { $namespaces = '0'; } else { @@ -195,23 +200,24 @@ class SearchOracle extends SearchEngine { $searchon = ''; if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', $filteredText, $m, PREG_SET_ORDER ) ) { - foreach( $m as $terms ) { + foreach ( $m as $terms ) { // Search terms in all variant forms, only // apply on wiki with LanguageConverter $temp_terms = $wgContLang->autoConvertToAllVariants( $terms[2] ); - if( is_array( $temp_terms )) { - $temp_terms = array_unique( array_values( $temp_terms )); - foreach( $temp_terms as $t ) { - $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $t ); + if ( is_array( $temp_terms ) ) { + $temp_terms = array_unique( array_values( $temp_terms ) ); + foreach ( $temp_terms as $t ) { + $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $t ); } } else { - $searchon .= ($terms[1] == '-' ? ' ~' : ' & ') . $this->escapeTerm( $terms[2] ); + $searchon .= ( $terms[1] == '-' ? ' ~' : ' & ' ) . $this->escapeTerm( $terms[2] ); } if ( !empty( $terms[3] ) ) { $regexp = preg_quote( $terms[3], '/' ); - if ( $terms[4] ) + if ( $terms[4] ) { $regexp .= "[0-9A-Za-z_]+"; + } } else { $regexp = preg_quote( str_replace( '"', '', $terms[2] ), '/' ); } @@ -227,9 +233,9 @@ class SearchOracle extends SearchEngine { private function escapeTerm( $t ) { global $wgContLang; $t = $wgContLang->normalizeForSearch( $t ); - $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{'.$t.'}' : $t; - $t = preg_replace('/^"(.*)"$/', '($1)', $t); - $t = preg_replace('/([-&|])/', '\\\\$1', $t); + $t = isset( $this->reservedWords[strtoupper( $t )] ) ? '{' . $t . '}' : $t; + $t = preg_replace( '/^"(.*)"$/', '($1)', $t ); + $t = preg_replace( '/([-&|])/', '\\\\$1', $t ); return $t; } /** @@ -273,7 +279,7 @@ class SearchOracle extends SearchEngine { $dbw->update( 'searchindex', array( 'si_title' => $title ), - array( 'si_page' => $id ), + array( 'si_page' => $id ), 'SearchOracle::updateTitle', array() ); } diff --git a/includes/search/SearchPostgres.php b/includes/search/SearchPostgres.php index 56464e98..7f19ed13 100644 --- a/includes/search/SearchPostgres.php +++ b/includes/search/SearchPostgres.php @@ -64,7 +64,7 @@ class SearchPostgres extends SearchEngine { function searchText( $term ) { $q = $this->searchQuery( $term, 'textvector', 'old_text' ); - $olderror = error_reporting(E_ERROR); + $olderror = error_reporting( E_ERROR ); $resultSet = $this->db->resultObject( $this->db->query( $q, 'SearchPostgres', true ) ); error_reporting( $olderror ); if ( !$resultSet ) { @@ -86,19 +86,19 @@ class SearchPostgres extends SearchEngine { wfDebug( "parseQuery received: $term \n" ); ## No backslashes allowed - $term = preg_replace('/\\\/', '', $term); + $term = preg_replace( '/\\\/', '', $term ); ## Collapse parens into nearby words: - $term = preg_replace('/\s*\(\s*/', ' (', $term); - $term = preg_replace('/\s*\)\s*/', ') ', $term); + $term = preg_replace( '/\s*\(\s*/', ' (', $term ); + $term = preg_replace( '/\s*\)\s*/', ') ', $term ); ## Treat colons as word separators: - $term = preg_replace('/:/', ' ', $term); + $term = preg_replace( '/:/', ' ', $term ); $searchstring = ''; $m = array(); - if( preg_match_all('/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { - foreach( $m as $terms ) { + if ( preg_match_all( '/([-!]?)(\S+)\s*/', $term, $m, PREG_SET_ORDER ) ) { + foreach ( $m as $terms ) { if ( strlen( $terms[1] ) ) { $searchstring .= ' & !'; } @@ -118,19 +118,19 @@ class SearchPostgres extends SearchEngine { } ## Strip out leading junk - $searchstring = preg_replace('/^[\s\&\|]+/', '', $searchstring); + $searchstring = preg_replace( '/^[\s\&\|]+/', '', $searchstring ); ## Remove any doubled-up operators - $searchstring = preg_replace('/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring); + $searchstring = preg_replace( '/([\!\&\|]) +(?:[\&\|] +)+/', "$1 ", $searchstring ); ## Remove any non-spaced operators (e.g. "Zounds!") - $searchstring = preg_replace('/([^ ])[\!\&\|]/', "$1", $searchstring); + $searchstring = preg_replace( '/([^ ])[\!\&\|]/', "$1", $searchstring ); ## Remove any trailing whitespace or operators - $searchstring = preg_replace('/[\s\!\&\|]+$/', '', $searchstring); + $searchstring = preg_replace( '/[\s\!\&\|]+$/', '', $searchstring ); ## Remove unnecessary quotes around everything - $searchstring = preg_replace('/^[\'"](.*)[\'"]$/', "$1", $searchstring); + $searchstring = preg_replace( '/^[\'"](.*)[\'"]$/', "$1", $searchstring ); ## Quote the whole thing $searchstring = $this->db->addQuotes( $searchstring ); @@ -163,30 +163,31 @@ class SearchPostgres extends SearchEngine { $top = $top[0]; if ( $top === "" ) { ## e.g. if only stopwords are used XXX return something better - $query = "SELECT page_id, page_namespace, page_title, 0 AS score ". + $query = "SELECT page_id, page_namespace, page_title, 0 AS score " . "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . "AND r.rev_text_id = c.old_id AND 1=0"; } else { $m = array(); - if( preg_match_all("/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { - foreach( $m as $terms ) { + if ( preg_match_all( "/'([^']+)'/", $top, $m, PREG_SET_ORDER ) ) { + foreach ( $m as $terms ) { $this->searchTerms[$terms[1]] = $terms[1]; } } - $query = "SELECT page_id, page_namespace, page_title, ". - "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score ". + $query = "SELECT page_id, page_namespace, page_title, " . + "ts_rank($fulltext, to_tsquery($searchstring), 5) AS score " . "FROM page p, revision r, pagecontent c WHERE p.page_latest = r.rev_id " . "AND r.rev_text_id = c.old_id AND $fulltext @@ to_tsquery($searchstring)"; } ## Redirects - if ( !$this->showRedirects ) + if ( !$this->showRedirects ) { $query .= ' AND page_is_redirect = 0'; + } ## Namespaces - defaults to 0 - if( !is_null( $this->namespaces ) ) { // null -> search all + if ( !is_null( $this->namespaces ) ) { // null -> search all if ( count( $this->namespaces ) < 1 ) { $query .= ' AND page_namespace = 0'; } else { @@ -208,7 +209,7 @@ class SearchPostgres extends SearchEngine { function update( $pageid, $title, $text ) { ## We don't want to index older revisions - $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id IN ". + $SQL = "UPDATE pagecontent SET textvector = NULL WHERE old_id IN " . "(SELECT rev_text_id FROM revision WHERE rev_page = " . intval( $pageid ) . " ORDER BY rev_text_id DESC OFFSET 1)"; $this->db->query( $SQL ); @@ -244,7 +245,7 @@ class PostgresSearchResultSet extends SqlSearchResultSet { function next() { $row = $this->mResultSet->fetchObject(); - if( $row === false ) { + if ( $row === false ) { return false; } else { return new PostgresSearchResult( $row ); diff --git a/includes/search/SearchSqlite.php b/includes/search/SearchSqlite.php index f3f4788c..554181f6 100644 --- a/includes/search/SearchSqlite.php +++ b/includes/search/SearchSqlite.php @@ -61,12 +61,12 @@ class SearchSqlite extends SearchEngine { $this->searchTerms = array(); $m = array(); - if( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', + if ( preg_match_all( '/([-+<>~]?)(([' . $lc . ']+)(\*?)|"[^"]*")/', $filteredText, $m, PREG_SET_ORDER ) ) { - foreach( $m as $bits ) { + foreach ( $m as $bits ) { @list( /* all */, $modifier, $term, $nonQuoted, $wildcard ) = $bits; - if( $nonQuoted != '' ) { + if ( $nonQuoted != '' ) { $term = $nonQuoted; $quote = ''; } else { @@ -74,14 +74,14 @@ class SearchSqlite extends SearchEngine { $quote = '"'; } - if( $searchon !== '' ) { + if ( $searchon !== '' ) { $searchon .= ' '; } // Some languages such as Serbian store the input form in the search index, // so we may need to search for matches in multiple writing system variants. $convertedVariants = $wgContLang->autoConvertToAllVariants( $term ); - if( is_array( $convertedVariants ) ) { + if ( is_array( $convertedVariants ) ) { $variants = array_unique( array_values( $convertedVariants ) ); } else { $variants = array( $term ); @@ -101,10 +101,11 @@ class SearchSqlite extends SearchEngine { $strippedVariants = array_unique( $strippedVariants ); $searchon .= $modifier; - if( count( $strippedVariants) > 1 ) + if ( count( $strippedVariants ) > 1 ) { $searchon .= '('; - foreach( $strippedVariants as $stripped ) { - if( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { + } + foreach ( $strippedVariants as $stripped ) { + if ( $nonQuoted && strpos( $stripped, ' ' ) !== false ) { // Hack for Chinese: we need to toss in quotes for // multiple-character phrases since normalizeForSearch() // added spaces between them to make word breaks. @@ -112,8 +113,9 @@ class SearchSqlite extends SearchEngine { } $searchon .= "$quote$stripped$quote$wildcard "; } - if( count( $strippedVariants) > 1 ) + if ( count( $strippedVariants ) > 1 ) { $searchon .= ')'; + } // Match individual terms or quoted phrase in result highlighting... // Note that variants will be introduced in a later stage for highlighting! @@ -134,8 +136,8 @@ class SearchSqlite extends SearchEngine { global $wgContLang; $regex = preg_quote( $string, '/' ); - if( $wgContLang->hasWordBreaks() ) { - if( $wildcard ) { + if ( $wgContLang->hasWordBreaks() ) { + if ( $wildcard ) { // Don't cut off the final bit! $regex = "\b$regex"; } else { @@ -184,10 +186,10 @@ class SearchSqlite extends SearchEngine { $resultSet = $this->db->query( $this->getQuery( $filteredTerm, $fulltext ) ); $total = null; - if( $wgCountTotalSearchHits ) { + if ( $wgCountTotalSearchHits ) { $totalResult = $this->db->query( $this->getCountQuery( $filteredTerm, $fulltext ) ); $row = $totalResult->fetchObject(); - if( $row ) { + if ( $row ) { $total = intval( $row->c ); } $totalResult->free(); @@ -201,7 +203,7 @@ class SearchSqlite extends SearchEngine { * @return String */ function queryRedirect() { - if( $this->showRedirects ) { + if ( $this->showRedirects ) { return ''; } else { return 'AND page_is_redirect=0'; @@ -213,8 +215,9 @@ class SearchSqlite extends SearchEngine { * @return String */ function queryNamespaces() { - if( is_null( $this->namespaces ) ) + if ( is_null( $this->namespaces ) ) { return ''; # search all + } if ( !count( $this->namespaces ) ) { $namespaces = '0'; } else { @@ -295,7 +298,7 @@ class SearchSqlite extends SearchEngine { if ( !$this->fulltextSearchSupported() ) { return; } - // @todo: find a method to do it in a single request, + // @todo find a method to do it in a single request, // couldn't do it so far due to typelessness of FTS3 tables. $dbw = wfGetDB( DB_MASTER ); @@ -324,7 +327,7 @@ class SearchSqlite extends SearchEngine { $dbw->update( 'searchindex', array( 'si_title' => $title ), - array( 'rowid' => $id ), + array( 'rowid' => $id ), __METHOD__ ); } } @@ -333,7 +336,7 @@ class SearchSqlite extends SearchEngine { * @ingroup Search */ class SqliteSearchResultSet extends SqlSearchResultSet { - function __construct( $resultSet, $terms, $totalHits=null ) { + function __construct( $resultSet, $terms, $totalHits = null ) { parent::__construct( $resultSet, $terms ); $this->mTotalHits = $totalHits; } diff --git a/includes/search/SearchUpdate.php b/includes/search/SearchUpdate.php index eabcda3e..82a413e9 100644 --- a/includes/search/SearchUpdate.php +++ b/includes/search/SearchUpdate.php @@ -29,51 +29,108 @@ * @ingroup Search */ class SearchUpdate implements DeferrableUpdate { - - private $mId = 0, $mNamespace, $mTitle, $mText; - private $mTitleWords; - - function __construct( $id, $title, $text = false ) { + /** + * Page id being updated + * @var int + */ + private $id = 0; + + /** + * Title we're updating + * @var Title + */ + private $title; + + /** + * Content of the page (not text) + * @var Content|false + */ + private $content; + + /** + * Constructor + * + * @param int $id Page id to update + * @param Title|string $title Title of page to update + * @param Content|string|false $c Content of the page to update. + * If a Content object, text will be gotten from it. String is for back-compat. + * Passing false tells the backend to just update the title, not the content + */ + public function __construct( $id, $title, $c = false ) { if ( is_string( $title ) ) { $nt = Title::newFromText( $title ); } else { $nt = $title; } - if( $nt ) { - $this->mId = $id; - $this->mText = $text; - - $this->mNamespace = $nt->getNamespace(); - $this->mTitle = $nt->getText(); # Discard namespace - - $this->mTitleWords = $this->mTextWords = array(); + if ( $nt ) { + $this->id = $id; + // is_string() check is back-compat for ApprovedRevs + if ( is_string( $c ) ) { + $this->content = new TextContent( $c ); + } else { + $this->content = $c ?: false; + } + $this->title = $nt; } else { wfDebug( "SearchUpdate object created with invalid title '$title'\n" ); } } - function doUpdate() { - global $wgContLang, $wgDisableSearchUpdate; + /** + * Perform actual update for the entry + */ + public function doUpdate() { + global $wgDisableSearchUpdate; - if( $wgDisableSearchUpdate || !$this->mId ) { + if ( $wgDisableSearchUpdate || !$this->id ) { return; } wfProfileIn( __METHOD__ ); - $search = SearchEngine::create(); - $lc = SearchEngine::legalSearchChars() . '&#;'; + $page = WikiPage::newFromId( $this->id, WikiPage::READ_LATEST ); + $indexTitle = Title::indexTitle( $this->title->getNamespace(), $this->title->getText() ); - if( $this->mText === false ) { - $search->updateTitle($this->mId, - $search->normalizeText( Title::indexTitle( $this->mNamespace, $this->mTitle ) ) ); - wfProfileOut( __METHOD__ ); - return; + foreach ( SearchEngine::getSearchTypes() as $type ) { + $search = SearchEngine::create( $type ); + if ( !$search->supports( 'search-update' ) ) { + continue; + } + + $normalTitle = $search->normalizeText( $indexTitle ); + + if ( $page === null ) { + $search->delete( $this->id, $normalTitle ); + continue; + } elseif ( $this->content === false ) { + $search->updateTitle( $this->id, $normalTitle ); + continue; + } + + $text = $search->getTextFromContent( $this->title, $this->content ); + if ( !$search->textAlreadyUpdatedForIndex() ) { + $text = self::updateText( $text ); + } + + # Perform the actual update + $search->update( $this->id, $normalTitle, $search->normalizeText( $text ) ); } + wfProfileOut( __METHOD__ ); + } + + /** + * Clean text for indexing. Only really suitable for indexing in databases. + * If you're using a real search engine, you'll probably want to override + * this behavior and do something nicer with the original wikitext. + */ + public static function updateText( $text ) { + global $wgContLang; + # Language-specific strip/conversion - $text = $wgContLang->normalizeForSearch( $this->mText ); + $text = $wgContLang->normalizeForSearch( $text ); + $lc = SearchEngine::legalSearchChars() . '&#;'; wfProfileIn( __METHOD__ . '-regexps' ); $text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/", @@ -123,22 +180,6 @@ class SearchUpdate implements DeferrableUpdate { # Strip wiki '' and ''' $text = preg_replace( "/''[']*/", " ", $text ); wfProfileOut( __METHOD__ . '-regexps' ); - - wfRunHooks( 'SearchUpdate', array( $this->mId, $this->mNamespace, $this->mTitle, &$text ) ); - - # Perform the actual update - $search->update( $this->mId, $search->normalizeText( Title::indexTitle( $this->mNamespace, $this->mTitle ) ), - $search->normalizeText( $text ) ); - - wfProfileOut( __METHOD__ ); + return $text; } } - -/** - * Placeholder class - * - * @ingroup Search - */ -class SearchUpdateMyISAM extends SearchUpdate { - # Inherits everything -} |