summaryrefslogtreecommitdiff
path: root/includes/media/GIFMetadataExtractor.php
blob: 178b0bf7a072e29400081a560f95715762ddf62b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
<?php
/**
 * GIF frame counter.
 *
 * Originally written in Perl by Steve Sanbeg.
 * Ported to PHP by Andrew Garrett
 * Deliberately not using MWExceptions to avoid external dependencies, encouraging
 * redistribution.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 * @ingroup Media
 */

/**
 * GIF frame counter.
 *
 * @ingroup Media
 */
class GIFMetadataExtractor {
	/** @var string */
	private static $gifFrameSep;

	/** @var string */
	private static $gifExtensionSep;

	/** @var string */
	private static $gifTerm;

	const VERSION = 1;

	// Each sub-block is less than or equal to 255 bytes.
	// Most of the time its 255 bytes, except for in XMP
	// blocks, where it's usually between 32-127 bytes each.
	const MAX_SUBBLOCKS = 262144; // 5mb divided by 20.

	/**
	 * @throws Exception
	 * @param string $filename
	 * @return array
	 */
	static function getMetadata( $filename ) {
		self::$gifFrameSep = pack( "C", ord( "," ) );
		self::$gifExtensionSep = pack( "C", ord( "!" ) );
		self::$gifTerm = pack( "C", ord( ";" ) );

		$frameCount = 0;
		$duration = 0.0;
		$isLooped = false;
		$xmp = "";
		$comment = array();

		if ( !$filename ) {
			throw new Exception( "No file name specified" );
		} elseif ( !file_exists( $filename ) || is_dir( $filename ) ) {
			throw new Exception( "File $filename does not exist" );
		}

		$fh = fopen( $filename, 'rb' );

		if ( !$fh ) {
			throw new Exception( "Unable to open file $filename" );
		}

		// Check for the GIF header
		$buf = fread( $fh, 6 );
		if ( !( $buf == 'GIF87a' || $buf == 'GIF89a' ) ) {
			throw new Exception( "Not a valid GIF file; header: $buf" );
		}

		// Skip over width and height.
		fread( $fh, 4 );

		// Read BPP
		$buf = fread( $fh, 1 );
		$bpp = self::decodeBPP( $buf );

		// Skip over background and aspect ratio
		fread( $fh, 2 );

		// Skip over the GCT
		self::readGCT( $fh, $bpp );

		while ( !feof( $fh ) ) {
			$buf = fread( $fh, 1 );

			if ( $buf == self::$gifFrameSep ) {
				// Found a frame
				$frameCount++;

				## Skip bounding box
				fread( $fh, 8 );

				## Read BPP
				$buf = fread( $fh, 1 );
				$bpp = self::decodeBPP( $buf );

				## Read GCT
				self::readGCT( $fh, $bpp );
				fread( $fh, 1 );
				self::skipBlock( $fh );
			} elseif ( $buf == self::$gifExtensionSep ) {
				$buf = fread( $fh, 1 );
				if ( strlen( $buf ) < 1 ) {
					throw new Exception( "Ran out of input" );
				}
				$extension_code = unpack( 'C', $buf );
				$extension_code = $extension_code[1];

				if ( $extension_code == 0xF9 ) {
					// Graphics Control Extension.
					fread( $fh, 1 ); // Block size

					fread( $fh, 1 ); // Transparency, disposal method, user input

					$buf = fread( $fh, 2 ); // Delay, in hundredths of seconds.
					if ( strlen( $buf ) < 2 ) {
						throw new Exception( "Ran out of input" );
					}
					$delay = unpack( 'v', $buf );
					$delay = $delay[1];
					$duration += $delay * 0.01;

					fread( $fh, 1 ); // Transparent colour index

					$term = fread( $fh, 1 ); // Should be a terminator
					if ( strlen( $term ) < 1 ) {
						throw new Exception( "Ran out of input" );
					}
					$term = unpack( 'C', $term );
					$term = $term[1];
					if ( $term != 0 ) {
						throw new Exception( "Malformed Graphics Control Extension block" );
					}
				} elseif ( $extension_code == 0xFE ) {
					// Comment block(s).
					$data = self::readBlock( $fh );
					if ( $data === "" ) {
						throw new Exception( 'Read error, zero-length comment block' );
					}

					// The standard says this should be ASCII, however its unclear if
					// thats true in practise. Check to see if its valid utf-8, if so
					// assume its that, otherwise assume its windows-1252 (iso-8859-1)
					$dataCopy = $data;
					// quickIsNFCVerify has the side effect of replacing any invalid characters
					UtfNormal::quickIsNFCVerify( $dataCopy );

					if ( $dataCopy !== $data ) {
						wfSuppressWarnings();
						$data = iconv( 'windows-1252', 'UTF-8', $data );
						wfRestoreWarnings();
					}

					$commentCount = count( $comment );
					if ( $commentCount === 0
						|| $comment[$commentCount - 1] !== $data
					) {
						// Some applications repeat the same comment on each
						// frame of an animated GIF image, so if this comment
						// is identical to the last, only extract once.
						$comment[] = $data;
					}
				} elseif ( $extension_code == 0xFF ) {
					// Application extension (Netscape info about the animated gif)
					// or XMP (or theoretically any other type of extension block)
					$blockLength = fread( $fh, 1 );
					if ( strlen( $blockLength ) < 1 ) {
						throw new Exception( "Ran out of input" );
					}
					$blockLength = unpack( 'C', $blockLength );
					$blockLength = $blockLength[1];
					$data = fread( $fh, $blockLength );

					if ( $blockLength != 11 ) {
						wfDebug( __METHOD__ . " GIF application block with wrong length\n" );
						fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
						self::skipBlock( $fh );
						continue;
					}

					// NETSCAPE2.0 (application name for animated gif)
					if ( $data == 'NETSCAPE2.0' ) {
						$data = fread( $fh, 2 ); // Block length and introduction, should be 03 01

						if ( $data != "\x03\x01" ) {
							throw new Exception( "Expected \x03\x01, got $data" );
						}

						// Unsigned little-endian integer, loop count or zero for "forever"
						$loopData = fread( $fh, 2 );
						if ( strlen( $loopData ) < 2 ) {
							throw new Exception( "Ran out of input" );
						}
						$loopData = unpack( 'v', $loopData );
						$loopCount = $loopData[1];

						if ( $loopCount != 1 ) {
							$isLooped = true;
						}

						// Read out terminator byte
						fread( $fh, 1 );
					} elseif ( $data == 'XMP DataXMP' ) {
						// application name for XMP data.
						// see pg 18 of XMP spec part 3.

						$xmp = self::readBlock( $fh, true );

						if ( substr( $xmp, -257, 3 ) !== "\x01\xFF\xFE"
							|| substr( $xmp, -4 ) !== "\x03\x02\x01\x00"
						) {
							// this is just a sanity check.
							throw new Exception( "XMP does not have magic trailer!" );
						}

						// strip out trailer.
						$xmp = substr( $xmp, 0, -257 );
					} else {
						// unrecognized extension block
						fseek( $fh, -( $blockLength + 1 ), SEEK_CUR );
						self::skipBlock( $fh );
						continue;
					}
				} else {
					self::skipBlock( $fh );
				}
			} elseif ( $buf == self::$gifTerm ) {
				break;
			} else {
				if ( strlen( $buf ) < 1 ) {
					throw new Exception( "Ran out of input" );
				}
				$byte = unpack( 'C', $buf );
				$byte = $byte[1];
				throw new Exception( "At position: " . ftell( $fh ) . ", Unknown byte " . $byte );
			}
		}

		return array(
			'frameCount' => $frameCount,
			'looped' => $isLooped,
			'duration' => $duration,
			'xmp' => $xmp,
			'comment' => $comment,
		);
	}

	/**
	 * @param resource $fh
	 * @param int $bpp
	 * @return void
	 */
	static function readGCT( $fh, $bpp ) {
		if ( $bpp > 0 ) {
			$max = pow( 2, $bpp );
			for ( $i = 1; $i <= $max; ++$i ) {
				fread( $fh, 3 );
			}
		}
	}

	/**
	 * @param string $data
	 * @throws Exception
	 * @return int
	 */
	static function decodeBPP( $data ) {
		if ( strlen( $data ) < 1 ) {
			throw new Exception( "Ran out of input" );
		}
		$buf = unpack( 'C', $data );
		$buf = $buf[1];
		$bpp = ( $buf & 7 ) + 1;
		$buf >>= 7;

		$have_map = $buf & 1;

		return $have_map ? $bpp : 0;
	}

	/**
	 * @param resource $fh
	 * @throws Exception
	 */
	static function skipBlock( $fh ) {
		while ( !feof( $fh ) ) {
			$buf = fread( $fh, 1 );
			if ( strlen( $buf ) < 1 ) {
				throw new Exception( "Ran out of input" );
			}
			$block_len = unpack( 'C', $buf );
			$block_len = $block_len[1];
			if ( $block_len == 0 ) {
				return;
			}
			fread( $fh, $block_len );
		}
	}

	/**
	 * Read a block. In the GIF format, a block is made up of
	 * several sub-blocks. Each sub block starts with one byte
	 * saying how long the sub-block is, followed by the sub-block.
	 * The entire block is terminated by a sub-block of length
	 * 0.
	 * @param resource $fh File handle
	 * @param bool $includeLengths Include the length bytes of the
	 *  sub-blocks in the returned value. Normally this is false,
	 *  except XMP is weird and does a hack where you need to keep
	 *  these length bytes.
	 * @throws Exception
	 * @return string The data.
	 */
	static function readBlock( $fh, $includeLengths = false ) {
		$data = '';
		$subLength = fread( $fh, 1 );
		$blocks = 0;

		while ( $subLength !== "\0" ) {
			$blocks++;
			if ( $blocks > self::MAX_SUBBLOCKS ) {
				throw new Exception( "MAX_SUBBLOCKS exceeded (over $blocks sub-blocks)" );
			}
			if ( feof( $fh ) ) {
				throw new Exception( "Read error: Unexpected EOF." );
			}
			if ( $includeLengths ) {
				$data .= $subLength;
			}

			$data .= fread( $fh, ord( $subLength ) );
			$subLength = fread( $fh, 1 );
		}

		return $data;
	}
}