From ca32f08966f1b51fcb19460f0996bb0c4048e6fe Mon Sep 17 00:00:00 2001 From: Pierre Schmitz Date: Sat, 3 Dec 2011 13:29:22 +0100 Subject: Update to MediaWiki 1.18.0 * also update ArchLinux skin to chagnes in MonoBook * Use only css to hide our menu bar when printing --- includes/media/IPTC.php | 576 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 576 insertions(+) create mode 100644 includes/media/IPTC.php (limited to 'includes/media/IPTC.php') diff --git a/includes/media/IPTC.php b/includes/media/IPTC.php new file mode 100644 index 00000000..1d19791c --- /dev/null +++ b/includes/media/IPTC.php @@ -0,0 +1,576 @@ + $val ) { + if ( isset( $val[0] ) && trim($val[0]) == '' ) { + wfDebugLog('iptc', "IPTC tag $tag had only whitespace as its value."); + continue; + } + switch( $tag ) { + case '2#120': /*IPTC caption. mapped with exif ImageDescription*/ + $data['ImageDescription'] = self::convIPTC( $val, $c ); + break; + case '2#116': /* copyright. Mapped with exif copyright */ + $data['Copyright'] = self::convIPTC( $val, $c ); + break; + case '2#080': /* byline. Mapped with exif Artist */ + /* merge with byline title (2:85) + * like how exif does it with + * Title, person. Not sure if this is best + * approach since we no longer have the two fields + * separate. each byline title entry corresponds to a + * specific byline. */ + + $bylines = self::convIPTC( $val, $c ); + if ( isset( $parsed['2#085'] ) ) { + $titles = self::convIPTC( $parsed['2#085'], $c ); + } else { + $titles = array(); + } + + for ( $i = 0; $i < count( $titles ); $i++ ) { + if ( isset( $bylines[$i] ) ) { + // theoretically this should always be set + // but doesn't hurt to be careful. + $bylines[$i] = $titles[$i] . ', ' . $bylines[$i]; + } + } + $data['Artist'] = $bylines; + break; + case '2#025': /* keywords */ + $data['Keywords'] = self::convIPTC( $val, $c ); + break; + case '2#101': /* Country (shown)*/ + $data['CountryDest'] = self::convIPTC( $val, $c ); + break; + case '2#095': /* state/province (shown) */ + $data['ProvinceOrStateDest'] = self::convIPTC( $val, $c ); + break; + case '2#090': /* city (Shown) */ + $data['CityDest'] = self::convIPTC( $val, $c ); + break; + case '2#092': /* sublocation (shown) */ + $data['SublocationDest'] = self::convIPTC( $val, $c ); + break; + case '2#005': /* object name/title */ + $data['ObjectName'] = self::convIPTC( $val, $c ); + break; + case '2#040': /* special instructions */ + $data['SpecialInstructions'] = self::convIPTC( $val, $c ); + break; + case '2#105': /* headline*/ + $data['Headline'] = self::convIPTC( $val, $c ); + break; + case '2#110': /* credit */ + /*"Identifies the provider of the objectdata, + * not necessarily the owner/creator". */ + $data['Credit'] = self::convIPTC( $val, $c ); + break; + case '2#115': /* source */ + /* "Identifies the original owner of the intellectual content of the + *objectdata. This could be an agency, a member of an agency or + *an individual." */ + $data['Source'] = self::convIPTC( $val, $c ); + break; + + case '2#007': /* edit status (lead, correction, etc) */ + $data['EditStatus'] = self::convIPTC( $val, $c ); + break; + case '2#015': /* category. deprecated. max 3 letters in theory, often more */ + $data['iimCategory'] = self::convIPTC( $val, $c ); + break; + case '2#020': /* category. deprecated. */ + $data['iimSupplementalCategory'] = self::convIPTC( $val, $c ); + break; + case '2#010': /*urgency (1-8. 1 most, 5 normal, 8 low priority)*/ + $data['Urgency'] = self::convIPTC( $val, $c ); + break; + case '2#022': + /* "Identifies objectdata that recurs often and predictably... + * Example: Euroweather" */ + $data['FixtureIdentifier'] = self::convIPTC( $val, $c ); + break; + case '2#026': + /* Content location code (iso 3166 + some custom things) + * ex: TUR (for turkey), XUN (for UN), XSP (outer space) + * See wikipedia article on iso 3166 and appendix D of iim std. */ + $data['LocationDestCode'] = self::convIPTC( $val, $c ); + break; + case '2#027': + /* Content location name. Full printable name + * of location of photo. */ + $data['LocationDest'] = self::convIPTC( $val, $c ); + break; + case '2#065': + /* Originating Program. + * Combine with Program version (2:70) if present. + */ + $software = self::convIPTC( $val, $c ); + + if ( count( $software ) !== 1 ) { + //according to iim standard this cannot have multiple values + //so if there is more than one, something weird is happening, + //and we skip it. + wfDebugLog( 'iptc', 'IPTC: Wrong count on 2:65 Software field' ); + break; + } + + if ( isset( $parsed['2#070'] ) ) { + //if a version is set for the software. + $softwareVersion = self::convIPTC( $parsed['2#070'], $c ); + unset($parsed['2#070']); + $data['Software'] = array( array( $software[0], $softwareVersion[0] ) ); + } else { + $data['Software'] = $software; + } + break; + case '2#075': + /* Object cycle. + * a for morning (am), p for evening, b for both */ + $data['ObjectCycle'] = self::convIPTC( $val, $c ); + break; + case '2#100': + /* Country/Primary location code. + * "Indicates the code of the country/primary location where the + * intellectual property of the objectdata was created" + * unclear how this differs from 2#026 + */ + $data['CountryCodeDest'] = self::convIPTC( $val, $c ); + break; + case '2#103': + /* original transmission ref. + * "A code representing the location of original transmission ac- + * cording to practises of the provider." + */ + $data['OriginalTransmissionRef'] = self::convIPTC( $val, $c ); + break; + case '2#118': /*contact*/ + $data['Contact'] = self::convIPTC( $val, $c ); + break; + case '2#122': + /* Writer/Editor + * "Identification of the name of the person involved in the writing, + * editing or correcting the objectdata or caption/abstract." + */ + $data['Writer'] = self::convIPTC( $val, $c ); + break; + case '2#135': /* lang code */ + $data['LanguageCode'] = self::convIPTC( $val, $c ); + break; + + // Start date stuff. + // It doesn't accept incomplete dates even though they are valid + // according to spec. + // Should potentially store timezone as well. + case '2#055': + //Date created (not date digitized). + //Maps to exif DateTimeOriginal + if ( isset( $parsed['2#060'] ) ) { + $time = $parsed['2#060']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeOriginal'] = $timestamp; + } + break; + + case '2#062': + //Date converted to digital representation. + //Maps to exif DateTimeDigitized + if ( isset( $parsed['2#063'] ) ) { + $time = $parsed['2#063']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeDigitized'] = $timestamp; + } + break; + + case '2#030': + //Date released. + if ( isset( $parsed['2#035'] ) ) { + $time = $parsed['2#035']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeReleased'] = $timestamp; + } + break; + + case '2#037': + //Date expires. + if ( isset( $parsed['2#038'] ) ) { + $time = $parsed['2#038']; + } else { + $time = Array(); + } + $timestamp = self::timeHelper( $val, $time, $c ); + if ($timestamp) { + $data['DateTimeExpires'] = $timestamp; + } + break; + + case '2#000': /* iim version */ + // unlike other tags, this is a 2-byte binary number. + //technically this is required if there is iptc data + //but in practise it isn't always there. + if ( strlen( $val[0] ) == 2 ) { + //if is just to be paranoid. + $versionValue = ord( substr( $val[0], 0, 1 ) ) * 256; + $versionValue += ord( substr( $val[0], 1, 1 ) ); + $data['iimVersion'] = $versionValue; + } + break; + + case '2#004': + // IntellectualGenere. + // first 4 characters are an id code + // That we're not really interested in. + + // This prop is weird, since it's + // allowed to have multiple values + // in iim 4.1, but not in the XMP + // stuff. We're going to just + // extract the first value. + $con = self::ConvIPTC( $val, $c ); + if ( strlen( $con[0] ) < 5 ) { + wfDebugLog( 'iptc', 'IPTC: ' + . '2:04 too short. ' + . 'Ignoring.' ); + break; + } + $extracted = substr( $con[0], 4 ); + $data['IntellectualGenre'] = $extracted; + break; + + case '2#012': + // Subject News code - this is a compound field + // at the moment we only extract the subject news + // code, which is an 8 digit (ascii) number + // describing the subject matter of the content. + $codes = self::convIPTC( $val, $c ); + foreach ( $codes as $ic ) { + $fields = explode(':', $ic, 3 ); + + if ( count( $fields ) < 2 || + $fields[0] !== 'IPTC' ) + { + wfDebugLog( 'IPTC', 'IPTC: ' + . 'Invalid 2:12 - ' . $ic ); + break; + } + $data['SubjectNewsCode'] = $fields[1]; + } + break; + + // purposely does not do 2:125, 2:130, 2:131, + // 2:47, 2:50, 2:45, 2:42, 2:8, 2:3 + // 2:200, 2:201, 2:202 + // or the audio stuff (2:150 to 2:154) + + case '2#070': + case '2#060': + case '2#063': + case '2#085': + case '2#038': + case '2#035': + //ignore. Handled elsewhere. + break; + + default: + wfDebugLog( 'iptc', "Unsupported iptc tag: $tag. Value: " . implode( ',', $val )); + break; + } + + } + return $data; + } + + /** + * Convert an iptc date and time tags into the exif format + * + * @todo Potentially this should also capture the timezone offset. + * @param Array $date The date tag + * @param Array $time The time tag + * @param $c + * @return String Date in exif format. + */ + private static function timeHelper( $date, $time, $c ) { + if ( count( $date ) === 1 ) { + //the standard says this should always be 1 + //just double checking. + list($date) = self::convIPTC( $date, $c ); + } else { + return null; + } + + if ( count( $time ) === 1 ) { + list($time) = self::convIPTC( $time, $c ); + $dateOnly = false; + } else { + $time = '000000+0000'; //placeholder + $dateOnly = true; + } + + if ( ! ( preg_match('/\d\d\d\d\d\d[-+]\d\d\d\d/', $time) + && preg_match('/\d\d\d\d\d\d\d\d/', $date) + && substr($date, 0, 4) !== '0000' + && substr($date, 4, 2) !== '00' + && substr($date, 6, 2) !== '00' + ) ) { + //something wrong. + // Note, this rejects some valid dates according to iptc spec + // for example: the date 00000400 means the photo was taken in + // April, but the year and day is unknown. We don't process these + // types of incomplete dates atm. + wfDebugLog( 'iptc', "IPTC: invalid time ( $time ) or date ( $date )"); + return null; + } + + $unixTS = wfTimestamp( TS_UNIX, $date . substr( $time, 0, 6 )); + if ( $unixTS === false ) { + wfDebugLog( 'iptc', "IPTC: can't convert date to TS_UNIX: $date $time." ); + return null; + } + + $tz = ( intval( substr( $time, 7, 2 ) ) *60*60 ) + + ( intval( substr( $time, 9, 2 ) ) * 60 ); + + if ( substr( $time, 6, 1 ) === '-' ) { + $tz = - $tz; + } + + $finalTimestamp = wfTimestamp( TS_EXIF, $unixTS + $tz ); + if ( $finalTimestamp === false ) { + wfDebugLog( 'iptc', "IPTC: can't make final timestamp. Date: " . ( $unixTS + $tz ) ); + return null; + } + if ( $dateOnly ) { + //return the date only + return substr( $finalTimestamp, 0, 10 ); + } else { + return $finalTimestamp; + } + } + + /** + * Helper function to convert charset for iptc values. + * @param $data Mixed String or Array: The iptc string + * @param $charset String: The charset + * + * @return string + */ + private static function convIPTC ( $data, $charset ) { + if ( is_array( $data ) ) { + foreach ($data as &$val) { + $val = self::convIPTCHelper( $val, $charset ); + } + } else { + $data = self::convIPTCHelper( $data, $charset ); + } + + return $data; + } + /** + * Helper function of a helper function to convert charset for iptc values. + * @param $data Mixed String or Array: The iptc string + * @param $charset String: The charset + * + * @return string + */ + private static function convIPTCHelper ( $data, $charset ) { + if ( $charset ) { + wfSuppressWarnings(); + $data = iconv($charset, "UTF-8//IGNORE", $data); + wfRestoreWarnings(); + if ($data === false) { + $data = ""; + wfDebugLog('iptc', __METHOD__ . " Error converting iptc data charset $charset to utf-8"); + } + } else { + //treat as utf-8 if is valid utf-8. otherwise pretend its windows-1252 + // most of the time if there is no 1:90 tag, it is either ascii, latin1, or utf-8 + $oldData = $data; + UtfNormal::quickIsNFCVerify( $data ); //make $data valid utf-8 + if ($data === $oldData) { + return $data; //if validation didn't change $data + } else { + return self::convIPTCHelper( $oldData, 'Windows-1252' ); + } + } + return trim( $data ); + } + + /** + * take the value of 1:90 tag and returns a charset + * @param String $tag 1:90 tag. + * @return string charset name or "?" + * Warning, this function does not (and is not intended to) detect + * all iso 2022 escape codes. In practise, the code for utf-8 is the + * only code that seems to have wide use. It does detect that code. + */ + static function getCharset($tag) { + + //According to iim standard, charset is defined by the tag 1:90. + //in which there are iso 2022 escape sequences to specify the character set. + //the iim standard seems to encourage that all necessary escape sequences are + //in the 1:90 tag, but says it doesn't have to be. + + //This is in need of more testing probably. This is definitely not complete. + //however reading the docs of some other iptc software, it appears that most iptc software + //only recognizes utf-8. If 1:90 tag is not present content is + // usually ascii or iso-8859-1 (and sometimes utf-8), but no guarantee. + + //This also won't work if there are more than one escape sequence in the 1:90 tag + //or if something is put in the G2, or G3 charsets, etc. It will only reliably recognize utf-8. + + // This is just going through the charsets mentioned in appendix C of the iim standard. + + // \x1b = ESC. + switch ( $tag ) { + case "\x1b%G": //utf-8 + //Also call things that are compatible with utf-8, utf-8 (e.g. ascii) + case "\x1b(B": // ascii + case "\x1b(@": // iso-646-IRV (ascii in latest version, $ different in older version) + $c = 'UTF-8'; + break; + case "\x1b(A": //like ascii, but british. + $c = 'ISO646-GB'; + break; + case "\x1b(C": //some obscure sweedish/finland encoding + $c = 'ISO-IR-8-1'; + break; + case "\x1b(D": + $c = 'ISO-IR-8-2'; + break; + case "\x1b(E": //some obscure danish/norway encoding + $c = 'ISO-IR-9-1'; + break; + case "\x1b(F": + $c = 'ISO-IR-9-2'; + break; + case "\x1b(G": + $c = 'SEN_850200_B'; // aka iso 646-SE; ascii-like + break; + case "\x1b(I": + $c = "ISO646-IT"; + break; + case "\x1b(L": + $c = "ISO646-PT"; + break; + case "\x1b(Z": + $c = "ISO646-ES"; + break; + case "\x1b([": + $c = "GREEK7-OLD"; + break; + case "\x1b(K": + $c = "ISO646-DE"; + break; + case "\x1b(N": //crylic + $c = "ISO_5427"; + break; + case "\x1b(`": //iso646-NO + $c = "NS_4551-1"; + break; + case "\x1b(f": //iso646-FR + $c = "NF_Z_62-010"; + break; + case "\x1b(g": + $c = "PT2"; //iso646-PT2 + break; + case "\x1b(h": + $c = "ES2"; + break; + case "\x1b(i": //iso646-HU + $c = "MSZ_7795.3"; + break; + case "\x1b(w": + $c = "CSA_Z243.4-1985-1"; + break; + case "\x1b(x": + $c = "CSA_Z243.4-1985-2"; + break; + case "\x1b\$(B": + case "\x1b\$B": + case "\x1b&@\x1b\$B": + case "\x1b&@\x1b\$(B": + $c = "JIS_C6226-1983"; + break; + case "\x1b-A": // iso-8859-1. at least for the high code characters. + case "\x1b(@\x1b-A": + case "\x1b(B\x1b-A": + $c = 'ISO-8859-1'; + break; + case "\x1b-B": // iso-8859-2. at least for the high code characters. + $c = 'ISO-8859-2'; + break; + case "\x1b-C": // iso-8859-3. at least for the high code characters. + $c = 'ISO-8859-3'; + break; + case "\x1b-D": // iso-8859-4. at least for the high code characters. + $c = 'ISO-8859-4'; + break; + case "\x1b-E": // iso-8859-5. at least for the high code characters. + $c = 'ISO-8859-5'; + break; + case "\x1b-F": // iso-8859-6. at least for the high code characters. + $c = 'ISO-8859-6'; + break; + case "\x1b-G": // iso-8859-7. at least for the high code characters. + $c = 'ISO-8859-7'; + break; + case "\x1b-H": // iso-8859-8. at least for the high code characters. + $c = 'ISO-8859-8'; + break; + case "\x1b-I": // CSN_369103. at least for the high code characters. + $c = 'CSN_369103'; + break; + default: + wfDebugLog('iptc', __METHOD__ . 'Unknown charset in iptc 1:90: ' . bin2hex( $tag ) ); + //at this point just give up and refuse to parse iptc? + $c = false; + } + return $c; + } +} -- cgit v1.2.3-54-g00ecf