/** * Base mw.TextSource object * * @param {Object} source Source object to extend * @param {Object} textProvider [Optional] The text provider interface ( to load source from api ) */ ( function( mw, $ ) { "use strict"; mw.TextSource = function( source ) { return this.init( source ); }; mw.TextSource.prototype = { //The load state: loaded: false, // Container for the captions // captions include "start", "end" and "content" fields captions: [], // The css style for captions ( some file formats specify display types ) styleCss: {}, // The previous index of the timed text served // Avoids searching the entire array on time updates. prevIndex: 0, /** * @constructor Inherits mediaSource from embedPlayer * @param {source} Base source element * @param {Object} Pointer to the textProvider */ init: function( source , textProvider) { // Inherits mediaSource for( var i in source){ this[ i ] = source[ i ]; } // Set default category to subtitle if unset: if( ! this.kind ) { this.kind = 'subtitle'; } //Set the textProvider if provided if( textProvider ) { this.textProvider = textProvider; } return this; }, /** * Function to load and parse the source text * @param {Function} callback Function called once text source is loaded */ load: function( callback ) { var _this = this; mw.log("TextSource:: load src "+ _this.getSrc() ); // Setup up a callback ( in case it was not defined ) if( !callback ){ callback = function(){ return ; }; } // Check if the captions have already been loaded: if( this.loaded ){ return callback(); } // Try to load src via XHR source if( !this.getSrc() ) { mw.log( "Error: TextSource no source url for text track"); return callback(); } // Check type for special loaders: $( mw ).triggerQueueCallback( 'TimedText_LoadTextSource', _this, function(){ if( _this.loaded ){ callback(); } else { // if no module loaded the text source use the normal ajax proxy: new mw.ajaxProxy({ url: _this.getSrc(), success: function( resultXML ) { _this.captions = _this.getCaptions( resultXML ); _this.loaded = true; mw.log("mw.TextSource :: loaded from " + _this.getSrc() + " Found: " + _this.captions.length + ' captions' ); callback(); }, error: function() { mw.log("Error: TextSource Error with http response"); _this.loaded = true; callback(); } }); } }) }, /** * Returns the text content for requested time * * @param {Number} time Time in seconds */ getCaptionForTime: function ( time ) { var prevCaption = this.captions[ this.prevIndex ]; var captionSet = {}; // Setup the startIndex: if( prevCaption && time >= prevCaption.start ) { var startIndex = this.prevIndex; }else{ // If a backwards seek start searching at the start: var startIndex = 0; } var firstCapIndex = 0; // Start looking for the text via time, add all matches that are in range for( var i = startIndex ; i < this.captions.length; i++ ) { var caption = this.captions[ i ]; // Don't handle captions with 0 or -1 end time: if( caption.end == 0 || caption.end == -1) continue; if( time >= caption.start && time <= caption.end ) { // set the earliest valid time to the current start index: if( !firstCapIndex ){ firstCapIndex = caption.start; } //mw.log("Start cap time: " + caption.start + ' End time: ' + caption.end ); captionSet[i] = caption ; } // captions are stored in start order stop search if we get larger than time if( caption.start > time ){ break; } } // Update the prevIndex: this.prevIndex = firstCapIndex; //Return the set of captions in range: return captionSet; }, /** * Check if the caption is an overlay format ( and must be ontop of the player ) */ isOverlay: function(){ return this.mimeType == 'text/xml'; }, getCaptions: function( data ){ // Detect caption data type: switch( this.mimeType ){ case 'text/mw-srt': return this.getCaptiosnFromMediaWikiSrt( data ); break; case 'text/x-srt': return this.getCaptionsFromSrt( data); break; case 'text/xml': return this.getCaptionsFromTMML( data ); break; } // caption mime not found return empty set: return []; }, getStyleCssById: function( styleId ){ if( this.styleCss[ styleId ] ){ return this.styleCss[ styleId ]; } return {}; }, /** * Grab timed text from TMML format * * @param data * @return */ getCaptionsFromTMML: function( data ){ var _this = this; mw.log("TextSource::getCaptionsFromTMML", data); // set up display information: var captions = []; var xml = ( $( data ).find("tt").length ) ? data : $.parseXML( data ); // Check for parse error: try { if( !xml || $( xml ).find('parsererror').length ){ mw.log("Error: close caption parse error: " + $( xml ).find('parsererror').text() ); return captions; } } catch ( e ) { mw.log( "Error: close caption parse error: " + e.toString() ); return captions; } // Set the body Style var bodyStyleId = $( xml ).find('body').attr('style'); // Set style translate ttml to css $( xml ).find( 'style').each( function( inx, style){ var cssObject = {}; // Map CamelCase css properties: $( style.attributes ).each(function(inx, attr){ var attrName = attr.name; if( attrName.substr(0, 4) !== 'tts:' ){ // skip return true; } var cssName = ''; for( var c = 4; c < attrName.length; c++){ if( attrName[c].toLowerCase() != attrName[c] ){ cssName += '-' + attrName[c].toLowerCase(); } else { cssName+= attrName[c] } } cssObject[ cssName ] = attr.nodeValue; }); // for(var i =0; i< style.length ) _this.styleCss[ $( style).attr('id') ] = cssObject; }); $( xml ).find( 'p' ).each( function( inx, p ){ // Get text content by converting ttml node to html var content = ''; $.each( p.childNodes, function(inx, node){ content+= _this.convertTTML2HTML( node ); }); // Get the end time: var end = null; if( $( p ).attr( 'end' ) ){ end = mw.npt2seconds( $( p ).attr( 'end' ) ); } // Look for dur if( !end && $( p ).attr( 'dur' )){ end = mw.npt2seconds( $( p ).attr( 'begin' ) ) + mw.npt2seconds( $( p ).attr( 'dur' ) ); } // Create the caption object : var captionObj ={ 'start': mw.npt2seconds( $( p ).attr( 'begin' ) ), 'end': end, 'content': content }; // See if we have custom metadata for position of this caption object // there are 35 columns across and 15 rows high var $meta = $(p).find( 'metadata' ); if( $meta.length ){ captionObj['css'] = { 'position': 'absolute' }; if( $meta.attr('cccol') ){ captionObj['css']['left'] = ( $meta.attr('cccol') / 35 ) * 100 +'%'; // also means the width has to be reduced: //captionObj['css']['width'] = 100 - parseInt( captionObj['css']['left'] ) + '%'; } if( $meta.attr('ccrow') ){ captionObj['css']['top'] = ( $meta.attr('ccrow') / 15 ) * 100 +'%'; } } if( $(p).attr('tts:textAlign') ){ if( !captionObj['css'] ){ captionObj['css'] = {}; } captionObj['css']['text-align'] = $(p).attr('tts:textAlign'); // Remove text align is "right" flip the css left: if( captionObj['css']['text-align'] == 'right' && captionObj['css']['left'] ){ //captionObj['css']['width'] = captionObj['css']['left']; captionObj['css']['left'] = null; } } // check if this p has any style else use the body parent if( $(p).attr('style') ){ captionObj['styleId'] = $(p).attr('style') ; } else { captionObj['styleId'] = bodyStyleId; } captions.push( captionObj); }); return captions; }, convertTTML2HTML: function( node ){ var _this = this; // look for text node: if( node.nodeType == 3 ){ return node.textContent; } // skip metadata nodes: if( node.nodeName == 'metadata' ){ return ''; } // if a br just append if( node.nodeName == 'br' ){ return '
'; } // Setup tts mappings TODO should be static property of a ttmlSource object. var ttsStyleMap = { 'tts:color' : 'color', 'tts:fontWeight' : 'font-weight', 'tts:fontStyle' : 'font-style' }; if( node.childNodes.length ){ var nodeString = ''; var styleVal = ''; for( var attr in ttsStyleMap ){ if( node.getAttribute( attr ) ){ styleVal+= ttsStyleMap[ attr ] + ':' + node.getAttribute( attr ) + ';'; } } nodeString += '<' + node.nodeName + ' style="' + styleVal + '" >'; $.each( node.childNodes, function( inx, childNode ){ nodeString += _this.convertTTML2HTML( childNode ); }); nodeString += ''; return nodeString; } }, /** * srt timed text parse handle: * @param {String} data Srt string to be parsed */ getCaptionsFromSrt: function ( data ){ mw.log("TextSource::getCaptionsFromSrt"); var _this = this; // Check if the "srt" parses as an XML try{ var xml = $.parseXML( data ); if( xml && $( xml ).find('parsererror').length == 0 ){ return this.getCaptionsFromTMML( data ); } } catch ( e ){ // srt should not be xml } // Remove dos newlines var srt = data.replace(/\r+/g, ''); // Trim white space start and end srt = srt.replace(/^\s+|\s+$/g, ''); // Remove all html tags for security reasons srt = srt.replace(/<[a-zA-Z\/][^>]*>/g, ''); // Get captions var captions = []; var caplist = srt.split('\n\n'); for (var i = 0; i < caplist.length; i++) { var captionText = ""; var caption = false; captionText = caplist[i]; var s = captionText.split(/\n/); if (s.length < 2) { // file format error or comment lines continue; } if (s[0].match(/^\d+$/) && s[1].match(/\d+:\d+:\d+/)) { // ignore caption number in s[0] // parse time string var m = s[1].match(/(\d+):(\d+):(\d+)(?:,(\d+))?\s*--?>\s*(\d+):(\d+):(\d+)(?:,(\d+))?/); if (m) { caption = _this.match2caption( m ); } else { // Unrecognized timestring continue; } if( caption ){ // concatenate text lines to html text caption['content'] = s.slice(2).join("
"); } } else { // file format error or comment lines continue; } // Add the current caption to the captions set: captions.push( caption ); } return captions; }, /** * Get srts from a mediawiki html / srt string * * Right now wiki -> html is not always friendly to our srt parsing. * The long term plan is to move the srt parsing to server side and have the api * server up the srt's times in JSON form * * Also see https://bugzilla.wikimedia.org/show_bug.cgi?id=29126 * * TODO move to mediaWiki specific module. */ getCaptiosnFromMediaWikiSrt: function( data ){ mw.log("TimedText::getCaptiosnFromMediaWikiSrt:"); var _this = this; var captions = [ ]; var curentCap = { 'content': '' }; var parseNextAsTime = false; // Note this string concatenation and html error wrapping sometimes causes // parse issues where the wikitext includes many native

tags without child // subtitles. In prating this is not a deal breakers because the wikitext for // TimedText namespace and associated srts already has a specific format. // Long term we will move to server side parsing. $( '

' + data + '
' ).find('p').each( function() { var currentPtext = $(this).html(); //mw.log( 'pText: ' + currentPtext ); // We translate raw wikitext gennerated html into a matched srt time sample. // The raw html looks like: // # // hh:mm:ss,ms --> hh:mm:ss,ms // text // // You can read more about the srt format here: // http://en.wikipedia.org/wiki/SubRip // // We attempt to be fairly robust in our regular expression to catch a few // srt variations such as omition of commas and empty text lines. var m = currentPtext .replace('-->', '-->') // restore --> with --> for easier srt parsing: .match(/\d+\s([\d\-]+):([\d\-]+):([\d\-]+)(?:,([\d\-]+))?\s*--?>\s*([\d\-]+):([\d\-]+):([\d\-]+)(?:,([\d\-]+))?\n?(.*)/); if (m) { captions.push( _this.match2caption( m ) ); return true; } /*** * Handle multi line sytle output * * Handles cases parse cases where an entire line can't be parsed in the single * regular expression above, Since the diffrent captions pars are outputed in * diffrent

tags by the wikitext parser output. */ // Check if we have reached the end of a multi line match if( parseInt( currentPtext ) == currentPtext ) { if( curentCap.content != '' ) { captions.push( curentCap ); } // Clear out the current caption content curentCap = { 'content': '' }; return true; } // Check only for time match: var m = currentPtext .replace('-->', '-->') .match(/(\d+):(\d+):(\d+)(?:,(\d+))?\s*--?>\s*(\d+):(\d+):(\d+)(?:,(\d+))?/); if (m) { // Update the currentCap: curentCap = _this.match2caption( m ); return true; } // Else append contnet for the curentCap if( currentPtext != '
' ) { curentCap['content'] += currentPtext; } }); //Push last subtitle: if( curentCap.length != 0) { captions.push( curentCap ); } mw.log( "TimedText::getCaptiosnFromMediaWikiSrt found " + captions.length + ' captions'); return captions; }, /** * Takes a regular expresion match and converts it to a caption object */ match2caption: function( m ){ var caption = {}; // Look for ms: var startMs = (m[4]) ? parseInt(m[4], 10) : 0; var endMs = (m[8]) ? parseInt(m[8], 10) : 0; caption['start'] = this.timeParts2seconds( m[1], m[2], m[3], startMs ); caption['end'] = this.timeParts2seconds( m[5], m[6], m[7], endMs ); if( m[9] ){ caption['content'] = $.trim( m[9] ); } return caption; }, /** * Takes time parts in hours, min, seconds and milliseconds and coverts to float seconds. */ timeParts2seconds: function( hours, min, sec, ms ){ return mw.measurements2seconds({ 'hours': hours, 'minutes': min, 'seconds' : sec, 'milliseconds': ms }); } }; } )( mediaWiki, jQuery );