summaryrefslogtreecommitdiff
path: root/includes/BacklinkCache.php
blob: 53f92dd95820bcd08a1aa1d5fcbd07ba7332ff2e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
<?php
/**
 * Class for fetching backlink lists, approximate backlink counts and partitions.
 * Instances of this class should typically be fetched with $title->getBacklinkCache().
 *
 * Ideally you should only get your backlinks from here when you think there is some
 * advantage in caching them. Otherwise it's just a waste of memory.
 */
class BacklinkCache {
	var $partitionCache = array();
	var $fullResultCache = array();
	var $title;
	var $db;

	const CACHE_EXPIRY = 3600;

	/**
	 * Create a new BacklinkCache
	 */
	function __construct( $title ) {
		$this->title = $title;
	}

	/**
	 * Clear locally stored data
	 */
	function clear() {
		$this->partitionCache = array();
		$this->fullResultCache = array();
		unset( $this->db );
	}

	/**
	 * Set the Database object to use
	 */
	public function setDB( $db ) {
		$this->db = $db;
	}

	protected function getDB() {
		if ( !isset( $this->db ) ) {
			$this->db = wfGetDB( DB_SLAVE );
		}
		return $this->db;
	}

	/**
	 * Get the backlinks for a given table. Cached in process memory only.
	 * @param $table String
	 * @param $startId Integer or false
	 * @param $endId Integer or false
	 * @return TitleArray
	 */
	public function getLinks( $table, $startId = false, $endId = false ) {
		wfProfileIn( __METHOD__ );

		$fromField = $this->getPrefix( $table ) . '_from';

		if ( $startId || $endId ) {
			// Partial range, not cached
			wfDebug( __METHOD__ . ": from DB (uncacheable range)\n" );
			$conds = $this->getConditions( $table );
			// Use the from field in the condition rather than the joined page_id,
			// because databases are stupid and don't necessarily propagate indexes.
			if ( $startId ) {
				$conds[] = "$fromField >= " . intval( $startId );
			}
			if ( $endId ) {
				$conds[] = "$fromField <= " . intval( $endId );
			}
			$res = $this->getDB()->select(
				array( $table, 'page' ),
				array( 'page_namespace', 'page_title', 'page_id' ),
				$conds,
				__METHOD__,
				array(
					'STRAIGHT_JOIN',
					'ORDER BY' => $fromField
				) );
			$ta = TitleArray::newFromResult( $res );
			wfProfileOut( __METHOD__ );
			return $ta;
		}

		if ( !isset( $this->fullResultCache[$table] ) ) {
			wfDebug( __METHOD__ . ": from DB\n" );
			$res = $this->getDB()->select(
				array( $table, 'page' ),
				array( 'page_namespace', 'page_title', 'page_id' ),
				$this->getConditions( $table ),
				__METHOD__,
				array(
					'STRAIGHT_JOIN',
					'ORDER BY' => $fromField,
				) );
			$this->fullResultCache[$table] = $res;
		}
		$ta = TitleArray::newFromResult( $this->fullResultCache[$table] );
		wfProfileOut( __METHOD__ );
		return $ta;
	}

	/**
	 * Get the field name prefix for a given table
	 */
	protected function getPrefix( $table ) {
		static $prefixes = array(
			'pagelinks' => 'pl',
			'imagelinks' => 'il',
			'categorylinks' => 'cl',
			'templatelinks' => 'tl',
			'redirect' => 'rd',
		);

		if ( isset( $prefixes[$table] ) ) {
			return $prefixes[$table];
		} else {
			throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
		}
	}

	/**
	 * Get the SQL condition array for selecting backlinks, with a join on the page table
	 */
	protected function getConditions( $table ) {
		$prefix = $this->getPrefix( $table );

		switch ( $table ) {
			case 'pagelinks':
			case 'templatelinks':
			case 'redirect':
				$conds = array(
					"{$prefix}_namespace" => $this->title->getNamespace(),
					"{$prefix}_title" => $this->title->getDBkey(),
					"page_id={$prefix}_from"
				);
				break;
			case 'imagelinks':
				$conds = array(
					'il_to' => $this->title->getDBkey(),
					'page_id=il_from'
				);
				break;
			case 'categorylinks':
				$conds = array(
					'cl_to' => $this->title->getDBkey(),
					'page_id=cl_from',
				);
				break;
			default:
				throw new MWException( "Invalid table \"$table\" in " . __CLASS__ );
		}
		return $conds;
	}

	/**
	 * Get the approximate number of backlinks
	 */
	public function getNumLinks( $table ) {
		if ( isset( $this->fullResultCache[$table] ) ) {
			return $this->fullResultCache[$table]->numRows();
		}

		if ( isset( $this->partitionCache[$table] ) ) {
			$entry = reset( $this->partitionCache[$table] );
			return $entry['numRows'];
		}

		$titleArray = $this->getLinks( $table );
		return $titleArray->count();
	}

	/**
	 * Partition the backlinks into batches.
	 * Returns an array giving the start and end of each range. The first batch has
	 * a start of false, and the last batch has an end of false.
	 *
	 * @param $table String: the links table name
	 * @param $batchSize Integer
	 * @return Array
	 */
	public function partition( $table, $batchSize ) {
		// Try cache
		if ( isset( $this->partitionCache[$table][$batchSize] ) ) {
			wfDebug( __METHOD__ . ": got from partition cache\n" );
			return $this->partitionCache[$table][$batchSize]['batches'];
		}

		$this->partitionCache[$table][$batchSize] = false;
		$cacheEntry =& $this->partitionCache[$table][$batchSize];

		// Try full result cache
		if ( isset( $this->fullResultCache[$table] ) ) {
			$cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
			wfDebug( __METHOD__ . ": got from full result cache\n" );
			return $cacheEntry['batches'];
		}

		// Try memcached
		global $wgMemc;
		$memcKey = wfMemcKey(
			'backlinks',
			md5( $this->title->getPrefixedDBkey() ),
			$table,
			$batchSize
		);
		$memcValue = $wgMemc->get( $memcKey );

		if ( is_array( $memcValue ) ) {
			$cacheEntry = $memcValue;
			wfDebug( __METHOD__ . ": got from memcached $memcKey\n" );
			return $cacheEntry['batches'];
		}
		// Fetch from database
		$this->getLinks( $table );
		$cacheEntry = $this->partitionResult( $this->fullResultCache[$table], $batchSize );
		// Save to memcached
		$wgMemc->set( $memcKey, $cacheEntry, self::CACHE_EXPIRY );
		wfDebug( __METHOD__ . ": got from database\n" );
		return $cacheEntry['batches'];
	}

	/**
	 * Partition a DB result with backlinks in it into batches
	 */
	protected function partitionResult( $res, $batchSize ) {
		$batches = array();
		$numRows = $res->numRows();
		$numBatches = ceil( $numRows / $batchSize );

		for ( $i = 0; $i < $numBatches; $i++ ) {
			if ( $i == 0  ) {
				$start = false;
			} else {
				$rowNum = intval( $numRows * $i / $numBatches );
				$res->seek( $rowNum );
				$row = $res->fetchObject();
				$start = $row->page_id;
			}

			if ( $i == $numBatches - 1 ) {
				$end = false;
			} else {
				$rowNum = intval( $numRows * ( $i + 1 ) / $numBatches );
				$res->seek( $rowNum );
				$row = $res->fetchObject();
				$end = $row->page_id - 1;
			}

			# Sanity check order
			if ( $start && $end && $start > $end ) {
				throw new MWException( __METHOD__ . ': Internal error: query result out of order' );
			}

			$batches[] = array( $start, $end );
		}
		return array( 'numRows' => $numRows, 'batches' => $batches );
	}
}