summaryrefslogtreecommitdiff
path: root/includes/LinksUpdate.php
blob: 9e25bf07d0ae793c07cf71680b28316f741e761b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
<?php
/**
 * See deferred.txt
 * @package MediaWiki
 */

/**
 * @todo document
 * @package MediaWiki
 */
class LinksUpdate {

	/**@{{
	 * @private
	 */
	var $mId,            //!< Page ID of the article linked from
		$mTitle,         //!< Title object of the article linked from
		$mLinks,         //!< Map of title strings to IDs for the links in the document
		$mImages,        //!< DB keys of the images used, in the array key only
		$mTemplates,     //!< Map of title strings to IDs for the template references, including broken ones
		$mExternals,     //!< URLs of external links, array key only
		$mCategories,    //!< Map of category names to sort keys
		$mInterlangs,    //!< Map of language codes to titles
		$mDb,            //!< Database connection reference
		$mOptions,       //!< SELECT options to be used (array)
		$mRecursive;     //!< Whether to queue jobs for recursive updates
	/**@}}*/

	/**
	 * Constructor
	 * Initialize private variables
	 * @param $title Integer: FIXME
	 * @param $parserOutput FIXME
	 * @param $recursive Boolean: FIXME, default 'true'.
	 */
	function LinksUpdate( $title, $parserOutput, $recursive = true ) {
		global $wgAntiLockFlags;

		if ( $wgAntiLockFlags & ALF_NO_LINK_LOCK ) {
			$this->mOptions = array();
		} else {
			$this->mOptions = array( 'FOR UPDATE' );
		}
		$this->mDb =& wfGetDB( DB_MASTER );

		if ( !is_object( $title ) ) {
			throw new MWException( "The calling convention to LinksUpdate::LinksUpdate() has changed. " .
				"Please see Article::editUpdates() for an invocation example.\n" );
		}
		$this->mTitle = $title;
		$this->mId = $title->getArticleID();

		$this->mLinks = $parserOutput->getLinks();
		$this->mImages = $parserOutput->getImages();
		$this->mTemplates = $parserOutput->getTemplates();
		$this->mExternals = $parserOutput->getExternalLinks();
		$this->mCategories = $parserOutput->getCategories();

		# Convert the format of the interlanguage links
		# I didn't want to change it in the ParserOutput, because that array is passed all 
		# the way back to the skin, so either a skin API break would be required, or an 
		# inefficient back-conversion.
		$ill = $parserOutput->getLanguageLinks();
		$this->mInterlangs = array();
		foreach ( $ill as $link ) {
			list( $key, $title ) = explode( ':', $link, 2 );
			$this->mInterlangs[$key] = $title;
		}

		$this->mRecursive = $recursive;
	}

	/**
	 * Update link tables with outgoing links from an updated article
	 */
	function doUpdate() {
		global $wgUseDumbLinkUpdate;
		if ( $wgUseDumbLinkUpdate ) {
			$this->doDumbUpdate();
		} else {
			$this->doIncrementalUpdate();
		}
	}

	function doIncrementalUpdate() {
		$fname = 'LinksUpdate::doIncrementalUpdate';
		wfProfileIn( $fname );
		
		# Page links
		$existing = $this->getExistingLinks();
		$this->incrTableUpdate( 'pagelinks', 'pl', $this->getLinkDeletions( $existing ),
			$this->getLinkInsertions( $existing ) );

		# Image links
		$existing = $this->getExistingImages();
		$this->incrTableUpdate( 'imagelinks', 'il', $this->getImageDeletions( $existing ),
			$this->getImageInsertions( $existing ) );

		# Invalidate all image description pages which had links added or removed
		$imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing );
		$this->invalidateImageDescriptions( $imageUpdates );

		# External links
		$existing = $this->getExistingExternals();
		$this->incrTableUpdate( 'externallinks', 'el', $this->getExternalDeletions( $existing ),
	        $this->getExternalInsertions( $existing ) );

		# Language links
		$existing = $this->getExistingInterlangs();
		$this->incrTableUpdate( 'langlinks', 'll', $this->getInterlangDeletions( $existing ),
			$this->getInterlangInsertions( $existing ) );

		# Template links
		$existing = $this->getExistingTemplates();
		$this->incrTableUpdate( 'templatelinks', 'tl', $this->getTemplateDeletions( $existing ),
			$this->getTemplateInsertions( $existing ) );

		# Category links
		$existing = $this->getExistingCategories();
		$this->incrTableUpdate( 'categorylinks', 'cl', $this->getCategoryDeletions( $existing ),
			$this->getCategoryInsertions( $existing ) );

		# Invalidate all categories which were added, deleted or changed (set symmetric difference)
		$categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
		$this->invalidateCategories( $categoryUpdates );

		# Refresh links of all pages including this page
		# This will be in a separate transaction
		if ( $this->mRecursive ) {
			$this->queueRecursiveJobs();
		}
		
		wfProfileOut( $fname );
	}

	/**
	 * Link update which clears the previous entries and inserts new ones
	 * May be slower or faster depending on level of lock contention and write speed of DB
	 * Also useful where link table corruption needs to be repaired, e.g. in refreshLinks.php
	 */
	function doDumbUpdate() {
		$fname = 'LinksUpdate::doDumbUpdate';
		wfProfileIn( $fname );

		# Refresh category pages and image description pages
		$existing = $this->getExistingCategories();
		$categoryUpdates = array_diff_assoc( $existing, $this->mCategories ) + array_diff_assoc( $this->mCategories, $existing );
		$existing = $this->getExistingImages();
		$imageUpdates = array_diff_key( $existing, $this->mImages ) + array_diff_key( $this->mImages, $existing );

		$this->dumbTableUpdate( 'pagelinks',     $this->getLinkInsertions(),     'pl_from' );
		$this->dumbTableUpdate( 'imagelinks',    $this->getImageInsertions(),    'il_from' );
		$this->dumbTableUpdate( 'categorylinks', $this->getCategoryInsertions(), 'cl_from' );
		$this->dumbTableUpdate( 'templatelinks', $this->getTemplateInsertions(), 'tl_from' );
		$this->dumbTableUpdate( 'externallinks', $this->getExternalInsertions(), 'el_from' );
		$this->dumbTableUpdate( 'langlinks',     $this->getInterlangInsertions(), 'll_from' );

		# Update the cache of all the category pages and image description pages which were changed
		$this->invalidateCategories( $categoryUpdates );
		$this->invalidateImageDescriptions( $imageUpdates );

		# Refresh links of all pages including this page
		# This will be in a separate transaction
		if ( $this->mRecursive ) {
			$this->queueRecursiveJobs();
		}

		wfProfileOut( $fname );
	}

	function queueRecursiveJobs() {
		wfProfileIn( __METHOD__ );
		
		$batchSize = 100;
		$dbr =& wfGetDB( DB_SLAVE );
		$res = $dbr->select( array( 'templatelinks', 'page' ), 
			array( 'page_namespace', 'page_title' ),
			array( 
				'page_id=tl_from', 
				'tl_namespace' => $this->mTitle->getNamespace(),
				'tl_title' => $this->mTitle->getDBkey()
			), __METHOD__
		);

		$done = false;
		while ( !$done ) {
			$jobs = array();
			for ( $i = 0; $i < $batchSize; $i++ ) {
				$row = $dbr->fetchObject( $res );
				if ( !$row ) {
					$done = true;
					break;
				}
				$title = Title::makeTitle( $row->page_namespace, $row->page_title );
				$jobs[] = Job::factory( 'refreshLinks', $title );
			}
			Job::batchInsert( $jobs );
		}
		$dbr->freeResult( $res );
		wfProfileOut( __METHOD__ );
	}
	
	/**
	 * Invalidate the cache of a list of pages from a single namespace
	 *
	 * @param integer $namespace
	 * @param array $dbkeys
	 */
	function invalidatePages( $namespace, $dbkeys ) {
		$fname = 'LinksUpdate::invalidatePages';
		
		if ( !count( $dbkeys ) ) {
			return;
		}
		
		/**
		 * Determine which pages need to be updated
		 * This is necessary to prevent the job queue from smashing the DB with
		 * large numbers of concurrent invalidations of the same page
		 */
		$now = $this->mDb->timestamp();
		$ids = array();
		$res = $this->mDb->select( 'page', array( 'page_id' ), 
			array( 
				'page_namespace' => $namespace,
				'page_title IN (' . $this->mDb->makeList( $dbkeys ) . ')',
				'page_touched < ' . $this->mDb->addQuotes( $now )
			), $fname
		);
		while ( $row = $this->mDb->fetchObject( $res ) ) {
			$ids[] = $row->page_id;
		}
		if ( !count( $ids ) ) {
			return;
		}
		
		/**
		 * Do the update
		 * We still need the page_touched condition, in case the row has changed since 
		 * the non-locking select above.
		 */
		$this->mDb->update( 'page', array( 'page_touched' => $now ), 
			array( 
				'page_id IN (' . $this->mDb->makeList( $ids ) . ')',
				'page_touched < ' . $this->mDb->addQuotes( $now )
			), $fname
		);
	}

	function invalidateCategories( $cats ) {
		$this->invalidatePages( NS_CATEGORY, array_keys( $cats ) );
	}

	function invalidateImageDescriptions( $images ) {
		$this->invalidatePages( NS_IMAGE, array_keys( $images ) );
	}

	function dumbTableUpdate( $table, $insertions, $fromField ) {
		$fname = 'LinksUpdate::dumbTableUpdate';
		$this->mDb->delete( $table, array( $fromField => $this->mId ), $fname );
		if ( count( $insertions ) ) {
			# The link array was constructed without FOR UPDATE, so there may be collisions
			# This may cause minor link table inconsistencies, which is better than
			# crippling the site with lock contention.
			$this->mDb->insert( $table, $insertions, $fname, array( 'IGNORE' ) );
		}
	}

	/**
	 * Make a WHERE clause from a 2-d NS/dbkey array
	 *
	 * @param array $arr 2-d array indexed by namespace and DB key
	 * @param string $prefix Field name prefix, without the underscore
	 */
	function makeWhereFrom2d( &$arr, $prefix ) {
		$lb = new LinkBatch;
		$lb->setArray( $arr );
		return $lb->constructSet( $prefix, $this->mDb );
	}

	/**
	 * Update a table by doing a delete query then an insert query
	 * @private
	 */
	function incrTableUpdate( $table, $prefix, $deletions, $insertions ) {
		$fname = 'LinksUpdate::incrTableUpdate';
		$where = array( "{$prefix}_from" => $this->mId );
		if ( $table == 'pagelinks' || $table == 'templatelinks' ) {
			$clause = $this->makeWhereFrom2d( $deletions, $prefix );
			if ( $clause ) {
				$where[] = $clause;
			} else {
				$where = false;
			}
		} else {
			if ( $table == 'langlinks' ) {
				$toField = 'll_lang';
			} else {
				$toField = $prefix . '_to';
			}
			if ( count( $deletions ) ) {
				$where[] = "$toField IN (" . $this->mDb->makeList( array_keys( $deletions ) ) . ')';
			} else {
				$where = false;
			}
		}
		if ( $where ) {
			$this->mDb->delete( $table, $where, $fname );
		}
		if ( count( $insertions ) ) {
			$this->mDb->insert( $table, $insertions, $fname, 'IGNORE' );
		}
	}


	/**
	 * Get an array of pagelinks insertions for passing to the DB
	 * Skips the titles specified by the 2-D array $existing
	 * @private
	 */
	function getLinkInsertions( $existing = array() ) {
		$arr = array();
		foreach( $this->mLinks as $ns => $dbkeys ) {
			# array_diff_key() was introduced in PHP 5.1, there is a compatibility function
			# in GlobalFunctions.php
			$diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys;
			foreach ( $diffs as $dbk => $id ) {
				$arr[] = array(
					'pl_from'      => $this->mId,
					'pl_namespace' => $ns,
					'pl_title'     => $dbk
				);
			}
		}
		return $arr;
	}

	/**
	 * Get an array of template insertions. Like getLinkInsertions()
	 * @private
	 */
	function getTemplateInsertions( $existing = array() ) {
		$arr = array();
		foreach( $this->mTemplates as $ns => $dbkeys ) {
			$diffs = isset( $existing[$ns] ) ? array_diff_key( $dbkeys, $existing[$ns] ) : $dbkeys;
			foreach ( $diffs as $dbk => $id ) {
				$arr[] = array(
					'tl_from'      => $this->mId,
					'tl_namespace' => $ns,
					'tl_title'     => $dbk
				);
			}
		}
		return $arr;
	}

	/**
	 * Get an array of image insertions
	 * Skips the names specified in $existing
	 * @private
	 */
	function getImageInsertions( $existing = array() ) {
		$arr = array();
		$diffs = array_diff_key( $this->mImages, $existing );
		foreach( $diffs as $iname => $dummy ) {
			$arr[] = array(
				'il_from' => $this->mId,
				'il_to'   => $iname
			);
		}
		return $arr;
	}

	/**
	 * Get an array of externallinks insertions. Skips the names specified in $existing
	 * @private
	 */
	function getExternalInsertions( $existing = array() ) {
		$arr = array();
		$diffs = array_diff_key( $this->mExternals, $existing );
		foreach( $diffs as $url => $dummy ) {
			$arr[] = array(
				'el_from'   => $this->mId,
				'el_to'     => $url,
				'el_index'  => wfMakeUrlIndex( $url ),
			);
		}
		return $arr;
	}

	/**
	 * Get an array of category insertions
	 * @param array $existing Array mapping existing category names to sort keys. If both
	 * match a link in $this, the link will be omitted from the output
	 * @private
	 */
	function getCategoryInsertions( $existing = array() ) {
		$diffs = array_diff_assoc( $this->mCategories, $existing );
		$arr = array();
		foreach ( $diffs as $name => $sortkey ) {
			$arr[] = array(
				'cl_from'    => $this->mId,
				'cl_to'      => $name,
				'cl_sortkey' => $sortkey,
				'cl_timestamp' => $this->mDb->timestamp()
			);
		}
		return $arr;
	}

	/**
	 * Get an array of interlanguage link insertions
	 * @param array $existing Array mapping existing language codes to titles	 
	 * @private
	 */
	function getInterlangInsertions( $existing = array() ) {
	    $diffs = array_diff_assoc( $this->mInterlangs, $existing );
	    $arr = array();
	    foreach( $diffs as $lang => $title ) {
	        $arr[] = array(
	            'll_from'  => $this->mId,
	            'll_lang'  => $lang,
	            'll_title' => $title
	        );
	    }
	    return $arr;
	}

	/**
	 * Given an array of existing links, returns those links which are not in $this
	 * and thus should be deleted.
	 * @private
	 */
	function getLinkDeletions( $existing ) {
		$del = array();
		foreach ( $existing as $ns => $dbkeys ) {
			if ( isset( $this->mLinks[$ns] ) ) {
				$del[$ns] = array_diff_key( $existing[$ns], $this->mLinks[$ns] );
			} else {
				$del[$ns] = $existing[$ns];
			}
		}
		return $del;
	}

	/**
	 * Given an array of existing templates, returns those templates which are not in $this
	 * and thus should be deleted.
	 * @private
	 */
	function getTemplateDeletions( $existing ) {
		$del = array();
		foreach ( $existing as $ns => $dbkeys ) {
			if ( isset( $this->mTemplates[$ns] ) ) {
				$del[$ns] = array_diff_key( $existing[$ns], $this->mTemplates[$ns] );
			} else {
				$del[$ns] = $existing[$ns];
			}
		}
		return $del;
	}

	/**
	 * Given an array of existing images, returns those images which are not in $this
	 * and thus should be deleted.
	 * @private
	 */
	function getImageDeletions( $existing ) {
		return array_diff_key( $existing, $this->mImages );
	}

	/** 
	 * Given an array of existing external links, returns those links which are not
	 * in $this and thus should be deleted.
	 * @private
	 */
	function getExternalDeletions( $existing ) {
		return array_diff_key( $existing, $this->mExternals );
	}

	/**
	 * Given an array of existing categories, returns those categories which are not in $this
	 * and thus should be deleted.
	 * @private
	 */
	function getCategoryDeletions( $existing ) {
		return array_diff_assoc( $existing, $this->mCategories );
	}

	/** 
	 * Given an array of existing interlanguage links, returns those links which are not
	 * in $this and thus should be deleted.
	 * @private
	 */
	function getInterlangDeletions( $existing ) {
	    return array_diff_assoc( $existing, $this->mInterlangs );
	}

	/**
	 * Get an array of existing links, as a 2-D array
	 * @private
	 */
	function getExistingLinks() {
		$fname = 'LinksUpdate::getExistingLinks';
		$res = $this->mDb->select( 'pagelinks', array( 'pl_namespace', 'pl_title' ),
			array( 'pl_from' => $this->mId ), $fname, $this->mOptions );
		$arr = array();
		while ( $row = $this->mDb->fetchObject( $res ) ) {
			if ( !isset( $arr[$row->pl_namespace] ) ) {
				$arr[$row->pl_namespace] = array();
			}
			$arr[$row->pl_namespace][$row->pl_title] = 1;
		}
		$this->mDb->freeResult( $res );
		return $arr;
	}

	/**
	 * Get an array of existing templates, as a 2-D array
	 * @private
	 */
	function getExistingTemplates() {
		$fname = 'LinksUpdate::getExistingTemplates';
		$res = $this->mDb->select( 'templatelinks', array( 'tl_namespace', 'tl_title' ),
			array( 'tl_from' => $this->mId ), $fname, $this->mOptions );
		$arr = array();
		while ( $row = $this->mDb->fetchObject( $res ) ) {
			if ( !isset( $arr[$row->tl_namespace] ) ) {
				$arr[$row->tl_namespace] = array();
			}
			$arr[$row->tl_namespace][$row->tl_title] = 1;
		}
		$this->mDb->freeResult( $res );
		return $arr;
	}

	/**
	 * Get an array of existing images, image names in the keys
	 * @private
	 */
	function getExistingImages() {
		$fname = 'LinksUpdate::getExistingImages';
		$res = $this->mDb->select( 'imagelinks', array( 'il_to' ),
			array( 'il_from' => $this->mId ), $fname, $this->mOptions );
		$arr = array();
		while ( $row = $this->mDb->fetchObject( $res ) ) {
			$arr[$row->il_to] = 1;
		}
		$this->mDb->freeResult( $res );
		return $arr;
	}

	/**
	 * Get an array of existing external links, URLs in the keys
	 * @private
	 */
	function getExistingExternals() {
		$fname = 'LinksUpdate::getExistingExternals';
		$res = $this->mDb->select( 'externallinks', array( 'el_to' ),
			array( 'el_from' => $this->mId ), $fname, $this->mOptions );
		$arr = array();
		while ( $row = $this->mDb->fetchObject( $res ) ) {
			$arr[$row->el_to] = 1;
		}
		$this->mDb->freeResult( $res );
		return $arr;
	}

	/**
	 * Get an array of existing categories, with the name in the key and sort key in the value.
	 * @private
	 */
	function getExistingCategories() {
		$fname = 'LinksUpdate::getExistingCategories';
		$res = $this->mDb->select( 'categorylinks', array( 'cl_to', 'cl_sortkey' ),
			array( 'cl_from' => $this->mId ), $fname, $this->mOptions );
		$arr = array();
		while ( $row = $this->mDb->fetchObject( $res ) ) {
			$arr[$row->cl_to] = $row->cl_sortkey;
		}
		$this->mDb->freeResult( $res );
		return $arr;
	}

	/**
	 * Get an array of existing interlanguage links, with the language code in the key and the 
	 * title in the value.
	 * @private
	 */
	function getExistingInterlangs() {
		$fname = 'LinksUpdate::getExistingInterlangs';
		$res = $this->mDb->select( 'langlinks', array( 'll_lang', 'll_title' ), 
			array( 'll_from' => $this->mId ), $fname, $this->mOptions );
		$arr = array();
		while ( $row = $this->mDb->fetchObject( $res ) ) {
			$arr[$row->ll_lang] = $row->ll_title;
		}
		return $arr;
	}
}
?>