summaryrefslogtreecommitdiff
path: root/includes/specials/SpecialExport.php
diff options
context:
space:
mode:
Diffstat (limited to 'includes/specials/SpecialExport.php')
-rw-r--r--includes/specials/SpecialExport.php551
1 files changed, 326 insertions, 225 deletions
diff --git a/includes/specials/SpecialExport.php b/includes/specials/SpecialExport.php
index 898b5a78..8bf16a71 100644
--- a/includes/specials/SpecialExport.php
+++ b/includes/specials/SpecialExport.php
@@ -21,208 +21,197 @@
* @ingroup SpecialPage
*/
-function wfExportGetPagesFromCategory( $title ) {
- global $wgContLang;
-
- $name = $title->getDBkey();
-
- $dbr = wfGetDB( DB_SLAVE );
-
- list( $page, $categorylinks ) = $dbr->tableNamesN( 'page', 'categorylinks' );
- $sql = "SELECT page_namespace, page_title FROM $page " .
- "JOIN $categorylinks ON cl_from = page_id " .
- "WHERE cl_to = " . $dbr->addQuotes( $name );
-
- $pages = array();
- $res = $dbr->query( $sql, 'wfExportGetPagesFromCategory' );
- while ( $row = $dbr->fetchObject( $res ) ) {
- $n = $row->page_title;
- if ($row->page_namespace) {
- $ns = $wgContLang->getNsText( $row->page_namespace );
- $n = $ns . ':' . $n;
- }
-
- $pages[] = $n;
+class SpecialExport extends SpecialPage {
+
+ private $curonly, $doExport, $pageLinkDepth, $templates;
+ private $images;
+
+ public function __construct() {
+ parent::__construct( 'Export' );
}
- $dbr->freeResult($res);
-
- return $pages;
-}
-
-/**
- * Expand a list of pages to include templates used in those pages.
- * @param $inputPages array, list of titles to look up
- * @param $pageSet array, associative array indexed by titles for output
- * @return array associative array index by titles
- */
-function wfExportGetTemplates( $inputPages, $pageSet ) {
- return wfExportGetLinks( $inputPages, $pageSet,
- 'templatelinks',
- array( 'tl_namespace AS namespace', 'tl_title AS title' ),
- array( 'page_id=tl_from' ) );
-}
-
-/**
- * Expand a list of pages to include images used in those pages.
- * @param $inputPages array, list of titles to look up
- * @param $pageSet array, associative array indexed by titles for output
- * @return array associative array index by titles
- */
-function wfExportGetImages( $inputPages, $pageSet ) {
- return wfExportGetLinks( $inputPages, $pageSet,
- 'imagelinks',
- array( NS_FILE . ' AS namespace', 'il_to AS title' ),
- array( 'page_id=il_from' ) );
-}
-
-/**
- * Expand a list of pages to include items used in those pages.
- * @private
- */
-function wfExportGetLinks( $inputPages, $pageSet, $table, $fields, $join ) {
- $dbr = wfGetDB( DB_SLAVE );
- foreach( $inputPages as $page ) {
- $title = Title::newFromText( $page );
- if( $title ) {
- $pageSet[$title->getPrefixedText()] = true;
- /// @fixme May or may not be more efficient to batch these
- /// by namespace when given multiple input pages.
- $result = $dbr->select(
- array( 'page', $table ),
- $fields,
- array_merge( $join,
- array(
- 'page_namespace' => $title->getNamespace(),
- 'page_title' => $title->getDBKey() ) ),
- __METHOD__ );
- foreach( $result as $row ) {
- $template = Title::makeTitle( $row->namespace, $row->title );
- $pageSet[$template->getPrefixedText()] = true;
+
+ public function execute( $par ) {
+ global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
+ global $wgExportAllowHistory, $wgExportMaxHistory, $wgExportMaxLinkDepth;
+ global $wgExportFromNamespaces;
+
+ $this->setHeaders();
+ $this->outputHeader();
+
+ // Set some variables
+ $this->curonly = true;
+ $this->doExport = false;
+ $this->templates = $wgRequest->getCheck( 'templates' );
+ $this->images = $wgRequest->getCheck( 'images' ); // Doesn't do anything yet
+ $this->pageLinkDepth = $this->validateLinkDepth(
+ $wgRequest->getIntOrNull( 'pagelink-depth' ) );
+
+ if ( $wgRequest->getCheck( 'addcat' ) ) {
+ $page = $wgRequest->getText( 'pages' );
+ $catname = $wgRequest->getText( 'catname' );
+
+ if ( $catname !== '' && $catname !== NULL && $catname !== false ) {
+ $t = Title::makeTitleSafe( NS_MAIN, $catname );
+ if ( $t ) {
+ /**
+ * @fixme This can lead to hitting memory limit for very large
+ * categories. Ideally we would do the lookup synchronously
+ * during the export in a single query.
+ */
+ $catpages = $this->getPagesFromCategory( $t );
+ if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
+ }
}
}
- }
- return $pageSet;
-}
-
-/**
- * Callback function to remove empty strings from the pages array.
- */
-function wfFilterPage( $page ) {
- return $page !== '' && $page !== null;
-}
-
-/**
- *
- */
-function wfSpecialExport( $page = '' ) {
- global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
- global $wgExportAllowHistory, $wgExportMaxHistory;
-
- $curonly = true;
- $doexport = false;
-
- if ( $wgRequest->getCheck( 'addcat' ) ) {
- $page = $wgRequest->getText( 'pages' );
- $catname = $wgRequest->getText( 'catname' );
-
- if ( $catname !== '' && $catname !== NULL && $catname !== false ) {
- $t = Title::makeTitleSafe( NS_MAIN, $catname );
- if ( $t ) {
+ else if( $wgRequest->getCheck( 'addns' ) && $wgExportFromNamespaces ) {
+ $page = $wgRequest->getText( 'pages' );
+ $nsindex = $wgRequest->getText( 'nsindex' );
+
+ if ( $nsindex !== '' && $nsindex !== NULL && $nsindex !== false ) {
/**
- * @fixme This can lead to hitting memory limit for very large
- * categories. Ideally we would do the lookup synchronously
- * during the export in a single query.
+ * Same implementation as above, so same @fixme
*/
- $catpages = wfExportGetPagesFromCategory( $t );
- if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
- }
+ $nspages = $this->getPagesFromNamespace( $nsindex );
+ if ( $nspages ) $page .= "\n" . implode( "\n", $nspages );
+ }
}
- }
- else if( $wgRequest->wasPosted() && $page == '' ) {
- $page = $wgRequest->getText( 'pages' );
- $curonly = $wgRequest->getCheck( 'curonly' );
- $rawOffset = $wgRequest->getVal( 'offset' );
- if( $rawOffset ) {
- $offset = wfTimestamp( TS_MW, $rawOffset );
+ else if( $wgRequest->wasPosted() && $par == '' ) {
+ $page = $wgRequest->getText( 'pages' );
+ $this->curonly = $wgRequest->getCheck( 'curonly' );
+ $rawOffset = $wgRequest->getVal( 'offset' );
+ if( $rawOffset ) {
+ $offset = wfTimestamp( TS_MW, $rawOffset );
+ } else {
+ $offset = null;
+ }
+ $limit = $wgRequest->getInt( 'limit' );
+ $dir = $wgRequest->getVal( 'dir' );
+ $history = array(
+ 'dir' => 'asc',
+ 'offset' => false,
+ 'limit' => $wgExportMaxHistory,
+ );
+ $historyCheck = $wgRequest->getCheck( 'history' );
+ if ( $this->curonly ) {
+ $history = WikiExporter::CURRENT;
+ } elseif ( !$historyCheck ) {
+ if ( $limit > 0 && $limit < $wgExportMaxHistory ) {
+ $history['limit'] = $limit;
+ }
+ if ( !is_null( $offset ) ) {
+ $history['offset'] = $offset;
+ }
+ if ( strtolower( $dir ) == 'desc' ) {
+ $history['dir'] = 'desc';
+ }
+ }
+
+ if( $page != '' ) $this->doExport = true;
} else {
- $offset = null;
+ // Default to current-only for GET requests
+ $page = $wgRequest->getText( 'pages', $par );
+ $historyCheck = $wgRequest->getCheck( 'history' );
+ if( $historyCheck ) {
+ $history = WikiExporter::FULL;
+ } else {
+ $history = WikiExporter::CURRENT;
+ }
+
+ if( $page != '' ) $this->doExport = true;
}
- $limit = $wgRequest->getInt( 'limit' );
- $dir = $wgRequest->getVal( 'dir' );
- $history = array(
- 'dir' => 'asc',
- 'offset' => false,
- 'limit' => $wgExportMaxHistory,
- );
- $historyCheck = $wgRequest->getCheck( 'history' );
- if ( $curonly ) {
+
+ if( !$wgExportAllowHistory ) {
+ // Override
$history = WikiExporter::CURRENT;
- } elseif ( !$historyCheck ) {
- if ( $limit > 0 && $limit < $wgExportMaxHistory ) {
- $history['limit'] = $limit;
- }
- if ( !is_null( $offset ) ) {
- $history['offset'] = $offset;
- }
- if ( strtolower( $dir ) == 'desc' ) {
- $history['dir'] = 'desc';
+ }
+
+ $list_authors = $wgRequest->getCheck( 'listauthors' );
+ if ( !$this->curonly || !$wgExportAllowListContributors ) $list_authors = false ;
+
+ if ( $this->doExport ) {
+ $wgOut->disable();
+ // Cancel output buffering and gzipping if set
+ // This should provide safer streaming for pages with history
+ wfResetOutputBuffers();
+ header( "Content-type: application/xml; charset=utf-8" );
+ if( $wgRequest->getCheck( 'wpDownload' ) ) {
+ // Provide a sane filename suggestion
+ $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
+ $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
}
+ $this->doExport( $page, $history, $list_authors );
+ return;
}
-
- if( $page != '' ) $doexport = true;
- } else {
- // Default to current-only for GET requests
- $page = $wgRequest->getText( 'pages', $page );
- $historyCheck = $wgRequest->getCheck( 'history' );
- if( $historyCheck ) {
- $history = WikiExporter::FULL;
+
+ $wgOut->addWikiMsg( 'exporttext' );
+
+ $form = Xml::openElement( 'form', array( 'method' => 'post',
+ 'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
+ $form .= Xml::inputLabel( wfMsg( 'export-addcattext' ) , 'catname', 'catname', 40 ) . '&nbsp;';
+ $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
+
+ if ( $wgExportFromNamespaces ) {
+ $form .= Xml::namespaceSelector( '', null, 'nsindex', wfMsg( 'export-addnstext' ) ) . '&nbsp;';
+ $form .= Xml::submitButton( wfMsg( 'export-addns' ), array( 'name' => 'addns' ) ) . '<br />';
+ }
+
+ $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
+ $form .= '<br />';
+
+ if( $wgExportAllowHistory ) {
+ $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
} else {
- $history = WikiExporter::CURRENT;
+ $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
}
-
- if( $page != '' ) $doexport = true;
+ $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
+ if( $wgExportMaxLinkDepth || $this->userCanOverrideExportDepth() ) {
+ $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
+ }
+ // Enable this when we can do something useful exporting/importing image information. :)
+ //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
+ $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
+
+ $form .= Xml::submitButton( wfMsg( 'export-submit' ), array( 'accesskey' => 's' ) );
+ $form .= Xml::closeElement( 'form' );
+ $wgOut->addHTML( $form );
}
+
+ private function userCanOverrideExportDepth() {
+ global $wgUser;
- if( !$wgExportAllowHistory ) {
- // Override
- $history = WikiExporter::CURRENT;
+ return $wgUser->isAllowed( 'override-export-depth' );
}
-
- $list_authors = $wgRequest->getCheck( 'listauthors' );
- if ( !$curonly || !$wgExportAllowListContributors ) $list_authors = false ;
-
- if ( $doexport ) {
- $wgOut->disable();
-
- // Cancel output buffering and gzipping if set
- // This should provide safer streaming for pages with history
- wfResetOutputBuffers();
- header( "Content-type: application/xml; charset=utf-8" );
- if( $wgRequest->getCheck( 'wpDownload' ) ) {
- // Provide a sane filename suggestion
- $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
- $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
- }
-
+
+ /**
+ * Do the actual page exporting
+ * @param string $page User input on what page(s) to export
+ * @param mixed $history one of the WikiExporter history export constants
+ */
+ private function doExport( $page, $history, $list_authors ) {
+ global $wgExportMaxHistory;
+
/* Split up the input and look up linked pages */
- $inputPages = array_filter( explode( "\n", $page ), 'wfFilterPage' );
+ $inputPages = array_filter( explode( "\n", $page ), array( $this, 'filterPage' ) );
$pageSet = array_flip( $inputPages );
-
- if( $wgRequest->getCheck( 'templates' ) ) {
- $pageSet = wfExportGetTemplates( $inputPages, $pageSet );
+
+ if( $this->templates ) {
+ $pageSet = $this->getTemplates( $inputPages, $pageSet );
}
-
- /*
- // Enable this when we can do something useful exporting/importing image information. :)
- if( $wgRequest->getCheck( 'images' ) ) {
- $pageSet = wfExportGetImages( $inputPages, $pageSet );
+
+ if( $linkDepth = $this->pageLinkDepth ) {
+ $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
}
- */
-
+
+ /*
+ // Enable this when we can do something useful exporting/importing image information. :)
+ if( $this->images ) ) {
+ $pageSet = $this->getImages( $inputPages, $pageSet );
+ }
+ */
+
$pages = array_keys( $pageSet );
-
+
/* Ok, let's get to it... */
-
if( $history == WikiExporter::CURRENT ) {
$lb = false;
$db = wfGetDB( DB_SLAVE );
@@ -238,65 +227,177 @@ function wfSpecialExport( $page = '' ) {
set_time_limit(0);
wfRestoreWarnings();
}
-
$exporter = new WikiExporter( $db, $history, $buffer );
- $exporter->list_authors = $list_authors ;
+ $exporter->list_authors = $list_authors;
$exporter->openStream();
-
foreach( $pages as $page ) {
/*
- if( $wgExportMaxHistory && !$curonly ) {
- $title = Title::newFromText( $page );
- if( $title ) {
- $count = Revision::countByTitle( $db, $title );
- if( $count > $wgExportMaxHistory ) {
- wfDebug( __FUNCTION__ .
- ": Skipped $page, $count revisions too big\n" );
- continue;
- }
- }
- }*/
-
+ if( $wgExportMaxHistory && !$this->curonly ) {
+ $title = Title::newFromText( $page );
+ if( $title ) {
+ $count = Revision::countByTitle( $db, $title );
+ if( $count > $wgExportMaxHistory ) {
+ wfDebug( __FUNCTION__ .
+ ": Skipped $page, $count revisions too big\n" );
+ continue;
+ }
+ }
+ }*/
#Bug 8824: Only export pages the user can read
$title = Title::newFromText( $page );
if( is_null( $title ) ) continue; #TODO: perhaps output an <error> tag or something.
if( !$title->userCanRead() ) continue; #TODO: perhaps output an <error> tag or something.
-
+
$exporter->pageByTitle( $title );
}
-
+
$exporter->closeStream();
if( $lb ) {
$lb->closeAll();
}
- return;
}
-
- $self = SpecialPage::getTitleFor( 'Export' );
- $wgOut->addHTML( wfMsgExt( 'exporttext', 'parse' ) );
-
- $form = Xml::openElement( 'form', array( 'method' => 'post',
- 'action' => $self->getLocalUrl( 'action=submit' ) ) );
-
- $form .= Xml::inputLabel( wfMsg( 'export-addcattext' ) , 'catname', 'catname', 40 ) . '&nbsp;';
- $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
-
- $form .= Xml::openElement( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ) );
- $form .= htmlspecialchars( $page );
- $form .= Xml::closeElement( 'textarea' );
- $form .= '<br />';
-
- if( $wgExportAllowHistory ) {
- $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
- } else {
- $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
+
+
+ private function getPagesFromCategory( $title ) {
+ global $wgContLang;
+
+ $name = $title->getDBkey();
+
+ $dbr = wfGetDB( DB_SLAVE );
+ $res = $dbr->select( array('page', 'categorylinks' ),
+ array( 'page_namespace', 'page_title' ),
+ array('cl_from=page_id', 'cl_to' => $name ),
+ __METHOD__, array('LIMIT' => '5000'));
+
+ $pages = array();
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ $n = $row->page_title;
+ if ($row->page_namespace) {
+ $ns = $wgContLang->getNsText( $row->page_namespace );
+ $n = $ns . ':' . $n;
+ }
+
+ $pages[] = $n;
+ }
+ $dbr->freeResult($res);
+
+ return $pages;
+ }
+
+ private function getPagesFromNamespace( $nsindex ) {
+ global $wgContLang;
+
+ $dbr = wfGetDB( DB_SLAVE );
+ $res = $dbr->select( 'page', array('page_namespace', 'page_title'),
+ array('page_namespace' => $nsindex),
+ __METHOD__, array('LIMIT' => '5000') );
+
+ $pages = array();
+ while ( $row = $dbr->fetchObject( $res ) ) {
+ $n = $row->page_title;
+ if ($row->page_namespace) {
+ $ns = $wgContLang->getNsText( $row->page_namespace );
+ $n = $ns . ':' . $n;
+ }
+
+ $pages[] = $n;
+ }
+ $dbr->freeResult($res);
+
+ return $pages;
+ }
+ /**
+ * Expand a list of pages to include templates used in those pages.
+ * @param $inputPages array, list of titles to look up
+ * @param $pageSet array, associative array indexed by titles for output
+ * @return array associative array index by titles
+ */
+ private function getTemplates( $inputPages, $pageSet ) {
+ return $this->getLinks( $inputPages, $pageSet,
+ 'templatelinks',
+ array( 'tl_namespace AS namespace', 'tl_title AS title' ),
+ array( 'page_id=tl_from' ) );
+ }
+
+ /**
+ * Validate link depth setting, if available.
+ */
+ private function validateLinkDepth( $depth ) {
+ global $wgExportMaxLinkDepth, $wgExportMaxLinkDepthLimit;
+ if( $depth < 0 ) {
+ return 0;
+ }
+ if ( !$this->userCanOverrideExportDepth() ) {
+ if( $depth > $wgExportMaxLinkDepth ) {
+ return $wgExportMaxLinkDepth;
+ }
+ }
+ /*
+ * There's a HARD CODED limit of 5 levels of recursion here to prevent a
+ * crazy-big export from being done by someone setting the depth
+ * number too high. In other words, last resort safety net.
+ */
+ return intval( min( $depth, 5 ) );
+ }
+
+ /** Expand a list of pages to include pages linked to from that page. */
+ private function getPageLinks( $inputPages, $pageSet, $depth ) {
+ for( $depth=$depth; $depth>0; --$depth ) {
+ $pageSet = $this->getLinks( $inputPages, $pageSet, 'pagelinks',
+ array( 'pl_namespace AS namespace', 'pl_title AS title' ),
+ array( 'page_id=pl_from' ) );
+ $inputPages = array_keys( $pageSet );
+ }
+ return $pageSet;
+ }
+
+ /**
+ * Expand a list of pages to include images used in those pages.
+ * @param $inputPages array, list of titles to look up
+ * @param $pageSet array, associative array indexed by titles for output
+ * @return array associative array index by titles
+ */
+ private function getImages( $inputPages, $pageSet ) {
+ return $this->getLinks( $inputPages, $pageSet,
+ 'imagelinks',
+ array( NS_FILE . ' AS namespace', 'il_to AS title' ),
+ array( 'page_id=il_from' ) );
+ }
+
+ /**
+ * Expand a list of pages to include items used in those pages.
+ * @private
+ */
+ private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
+ $dbr = wfGetDB( DB_SLAVE );
+ foreach( $inputPages as $page ) {
+ $title = Title::newFromText( $page );
+ if( $title ) {
+ $pageSet[$title->getPrefixedText()] = true;
+ /// @fixme May or may not be more efficient to batch these
+ /// by namespace when given multiple input pages.
+ $result = $dbr->select(
+ array( 'page', $table ),
+ $fields,
+ array_merge( $join,
+ array(
+ 'page_namespace' => $title->getNamespace(),
+ 'page_title' => $title->getDBKey() ) ),
+ __METHOD__ );
+ foreach( $result as $row ) {
+ $template = Title::makeTitle( $row->namespace, $row->title );
+ $pageSet[$template->getPrefixedText()] = true;
+ }
+ }
+ }
+ return $pageSet;
+ }
+
+ /**
+ * Callback function to remove empty strings from the pages array.
+ */
+ private function filterPage( $page ) {
+ return $page !== '' && $page !== null;
}
- $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
- // Enable this when we can do something useful exporting/importing image information. :)
- //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
- $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
-
- $form .= Xml::submitButton( wfMsg( 'export-submit' ), array( 'accesskey' => 's' ) );
- $form .= Xml::closeElement( 'form' );
- $wgOut->addHTML( $form );
}
+