caption-parser.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Reads in-band CEA-708 captions out of FMP4 segments.
  8. * @see https://en.wikipedia.org/wiki/CEA-708
  9. */
  10. 'use strict';
  11. var discardEmulationPreventionBytes = require('../tools/caption-packet-parser').discardEmulationPreventionBytes;
  12. var CaptionStream = require('../m2ts/caption-stream').CaptionStream;
  13. var findBox = require('../mp4/find-box.js');
  14. var parseTfdt = require('../tools/parse-tfdt.js');
  15. var parseTrun = require('../tools/parse-trun.js');
  16. var parseTfhd = require('../tools/parse-tfhd.js');
  17. var window = require('global/window');
  18. /**
  19. * Maps an offset in the mdat to a sample based on the the size of the samples.
  20. * Assumes that `parseSamples` has been called first.
  21. *
  22. * @param {Number} offset - The offset into the mdat
  23. * @param {Object[]} samples - An array of samples, parsed using `parseSamples`
  24. * @return {?Object} The matching sample, or null if no match was found.
  25. *
  26. * @see ISO-BMFF-12/2015, Section 8.8.8
  27. **/
  28. var mapToSample = function(offset, samples) {
  29. var approximateOffset = offset;
  30. for (var i = 0; i < samples.length; i++) {
  31. var sample = samples[i];
  32. if (approximateOffset < sample.size) {
  33. return sample;
  34. }
  35. approximateOffset -= sample.size;
  36. }
  37. return null;
  38. };
  39. /**
  40. * Finds SEI nal units contained in a Media Data Box.
  41. * Assumes that `parseSamples` has been called first.
  42. *
  43. * @param {Uint8Array} avcStream - The bytes of the mdat
  44. * @param {Object[]} samples - The samples parsed out by `parseSamples`
  45. * @param {Number} trackId - The trackId of this video track
  46. * @return {Object[]} seiNals - the parsed SEI NALUs found.
  47. * The contents of the seiNal should match what is expected by
  48. * CaptionStream.push (nalUnitType, size, data, escapedRBSP, pts, dts)
  49. *
  50. * @see ISO-BMFF-12/2015, Section 8.1.1
  51. * @see Rec. ITU-T H.264, 7.3.2.3.1
  52. **/
  53. var findSeiNals = function(avcStream, samples, trackId) {
  54. var
  55. avcView = new DataView(avcStream.buffer, avcStream.byteOffset, avcStream.byteLength),
  56. result = {
  57. logs: [],
  58. seiNals: []
  59. },
  60. seiNal,
  61. i,
  62. length,
  63. lastMatchedSample;
  64. for (i = 0; i + 4 < avcStream.length; i += length) {
  65. length = avcView.getUint32(i);
  66. i += 4;
  67. // Bail if this doesn't appear to be an H264 stream
  68. if (length <= 0) {
  69. continue;
  70. }
  71. switch (avcStream[i] & 0x1F) {
  72. case 0x06:
  73. var data = avcStream.subarray(i + 1, i + 1 + length);
  74. var matchingSample = mapToSample(i, samples);
  75. seiNal = {
  76. nalUnitType: 'sei_rbsp',
  77. size: length,
  78. data: data,
  79. escapedRBSP: discardEmulationPreventionBytes(data),
  80. trackId: trackId
  81. };
  82. if (matchingSample) {
  83. seiNal.pts = matchingSample.pts;
  84. seiNal.dts = matchingSample.dts;
  85. lastMatchedSample = matchingSample;
  86. } else if (lastMatchedSample) {
  87. // If a matching sample cannot be found, use the last
  88. // sample's values as they should be as close as possible
  89. seiNal.pts = lastMatchedSample.pts;
  90. seiNal.dts = lastMatchedSample.dts;
  91. } else {
  92. result.logs.push({
  93. level: 'warn',
  94. message: 'We\'ve encountered a nal unit without data at ' + i + ' for trackId ' + trackId + '. See mux.js#223.'
  95. });
  96. break;
  97. }
  98. result.seiNals.push(seiNal);
  99. break;
  100. default:
  101. break;
  102. }
  103. }
  104. return result;
  105. };
  106. /**
  107. * Parses sample information out of Track Run Boxes and calculates
  108. * the absolute presentation and decode timestamps of each sample.
  109. *
  110. * @param {Array<Uint8Array>} truns - The Trun Run boxes to be parsed
  111. * @param {Number|BigInt} baseMediaDecodeTime - base media decode time from tfdt
  112. @see ISO-BMFF-12/2015, Section 8.8.12
  113. * @param {Object} tfhd - The parsed Track Fragment Header
  114. * @see inspect.parseTfhd
  115. * @return {Object[]} the parsed samples
  116. *
  117. * @see ISO-BMFF-12/2015, Section 8.8.8
  118. **/
  119. var parseSamples = function(truns, baseMediaDecodeTime, tfhd) {
  120. var currentDts = baseMediaDecodeTime;
  121. var defaultSampleDuration = tfhd.defaultSampleDuration || 0;
  122. var defaultSampleSize = tfhd.defaultSampleSize || 0;
  123. var trackId = tfhd.trackId;
  124. var allSamples = [];
  125. truns.forEach(function(trun) {
  126. // Note: We currently do not parse the sample table as well
  127. // as the trun. It's possible some sources will require this.
  128. // moov > trak > mdia > minf > stbl
  129. var trackRun = parseTrun(trun);
  130. var samples = trackRun.samples;
  131. samples.forEach(function(sample) {
  132. if (sample.duration === undefined) {
  133. sample.duration = defaultSampleDuration;
  134. }
  135. if (sample.size === undefined) {
  136. sample.size = defaultSampleSize;
  137. }
  138. sample.trackId = trackId;
  139. sample.dts = currentDts;
  140. if (sample.compositionTimeOffset === undefined) {
  141. sample.compositionTimeOffset = 0;
  142. }
  143. if (typeof currentDts === 'bigint') {
  144. sample.pts = currentDts + window.BigInt(sample.compositionTimeOffset);
  145. currentDts += window.BigInt(sample.duration);
  146. } else {
  147. sample.pts = currentDts + sample.compositionTimeOffset;
  148. currentDts += sample.duration;
  149. }
  150. });
  151. allSamples = allSamples.concat(samples);
  152. });
  153. return allSamples;
  154. };
  155. /**
  156. * Parses out caption nals from an FMP4 segment's video tracks.
  157. *
  158. * @param {Uint8Array} segment - The bytes of a single segment
  159. * @param {Number} videoTrackId - The trackId of a video track in the segment
  160. * @return {Object.<Number, Object[]>} A mapping of video trackId to
  161. * a list of seiNals found in that track
  162. **/
  163. var parseCaptionNals = function(segment, videoTrackId) {
  164. // To get the samples
  165. var trafs = findBox(segment, ['moof', 'traf']);
  166. // To get SEI NAL units
  167. var mdats = findBox(segment, ['mdat']);
  168. var captionNals = {};
  169. var mdatTrafPairs = [];
  170. // Pair up each traf with a mdat as moofs and mdats are in pairs
  171. mdats.forEach(function(mdat, index) {
  172. var matchingTraf = trafs[index];
  173. mdatTrafPairs.push({
  174. mdat: mdat,
  175. traf: matchingTraf
  176. });
  177. });
  178. mdatTrafPairs.forEach(function(pair) {
  179. var mdat = pair.mdat;
  180. var traf = pair.traf;
  181. var tfhd = findBox(traf, ['tfhd']);
  182. // Exactly 1 tfhd per traf
  183. var headerInfo = parseTfhd(tfhd[0]);
  184. var trackId = headerInfo.trackId;
  185. var tfdt = findBox(traf, ['tfdt']);
  186. // Either 0 or 1 tfdt per traf
  187. var baseMediaDecodeTime = (tfdt.length > 0) ? parseTfdt(tfdt[0]).baseMediaDecodeTime : 0;
  188. var truns = findBox(traf, ['trun']);
  189. var samples;
  190. var result;
  191. // Only parse video data for the chosen video track
  192. if (videoTrackId === trackId && truns.length > 0) {
  193. samples = parseSamples(truns, baseMediaDecodeTime, headerInfo);
  194. result = findSeiNals(mdat, samples, trackId);
  195. if (!captionNals[trackId]) {
  196. captionNals[trackId] = {seiNals: [], logs: []};
  197. }
  198. captionNals[trackId].seiNals = captionNals[trackId].seiNals.concat(result.seiNals);
  199. captionNals[trackId].logs = captionNals[trackId].logs.concat(result.logs);
  200. }
  201. });
  202. return captionNals;
  203. };
  204. /**
  205. * Parses out inband captions from an MP4 container and returns
  206. * caption objects that can be used by WebVTT and the TextTrack API.
  207. * @see https://developer.mozilla.org/en-US/docs/Web/API/VTTCue
  208. * @see https://developer.mozilla.org/en-US/docs/Web/API/TextTrack
  209. * Assumes that `probe.getVideoTrackIds` and `probe.timescale` have been called first
  210. *
  211. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  212. * @param {Number} trackId - The id of the video track to parse
  213. * @param {Number} timescale - The timescale for the video track from the init segment
  214. *
  215. * @return {?Object[]} parsedCaptions - A list of captions or null if no video tracks
  216. * @return {Number} parsedCaptions[].startTime - The time to show the caption in seconds
  217. * @return {Number} parsedCaptions[].endTime - The time to stop showing the caption in seconds
  218. * @return {Object[]} parsedCaptions[].content - A list of individual caption segments
  219. * @return {String} parsedCaptions[].content.text - The visible content of the caption segment
  220. * @return {Number} parsedCaptions[].content.line - The line height from 1-15 for positioning of the caption segment
  221. * @return {Number} parsedCaptions[].content.position - The column indent percentage for cue positioning from 10-80
  222. **/
  223. var parseEmbeddedCaptions = function(segment, trackId, timescale) {
  224. var captionNals;
  225. // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  226. if (trackId === null) {
  227. return null;
  228. }
  229. captionNals = parseCaptionNals(segment, trackId);
  230. var trackNals = captionNals[trackId] || {};
  231. return {
  232. seiNals: trackNals.seiNals,
  233. logs: trackNals.logs,
  234. timescale: timescale
  235. };
  236. };
  237. /**
  238. * Converts SEI NALUs into captions that can be used by video.js
  239. **/
  240. var CaptionParser = function() {
  241. var isInitialized = false;
  242. var captionStream;
  243. // Stores segments seen before trackId and timescale are set
  244. var segmentCache;
  245. // Stores video track ID of the track being parsed
  246. var trackId;
  247. // Stores the timescale of the track being parsed
  248. var timescale;
  249. // Stores captions parsed so far
  250. var parsedCaptions;
  251. // Stores whether we are receiving partial data or not
  252. var parsingPartial;
  253. /**
  254. * A method to indicate whether a CaptionParser has been initalized
  255. * @returns {Boolean}
  256. **/
  257. this.isInitialized = function() {
  258. return isInitialized;
  259. };
  260. /**
  261. * Initializes the underlying CaptionStream, SEI NAL parsing
  262. * and management, and caption collection
  263. **/
  264. this.init = function(options) {
  265. captionStream = new CaptionStream();
  266. isInitialized = true;
  267. parsingPartial = options ? options.isPartial : false;
  268. // Collect dispatched captions
  269. captionStream.on('data', function(event) {
  270. // Convert to seconds in the source's timescale
  271. event.startTime = event.startPts / timescale;
  272. event.endTime = event.endPts / timescale;
  273. parsedCaptions.captions.push(event);
  274. parsedCaptions.captionStreams[event.stream] = true;
  275. });
  276. captionStream.on('log', function(log) {
  277. parsedCaptions.logs.push(log);
  278. });
  279. };
  280. /**
  281. * Determines if a new video track will be selected
  282. * or if the timescale changed
  283. * @return {Boolean}
  284. **/
  285. this.isNewInit = function(videoTrackIds, timescales) {
  286. if ((videoTrackIds && videoTrackIds.length === 0) ||
  287. (timescales && typeof timescales === 'object' &&
  288. Object.keys(timescales).length === 0)) {
  289. return false;
  290. }
  291. return trackId !== videoTrackIds[0] ||
  292. timescale !== timescales[trackId];
  293. };
  294. /**
  295. * Parses out SEI captions and interacts with underlying
  296. * CaptionStream to return dispatched captions
  297. *
  298. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  299. * @param {Number[]} videoTrackIds - A list of video tracks found in the init segment
  300. * @param {Object.<Number, Number>} timescales - The timescales found in the init segment
  301. * @see parseEmbeddedCaptions
  302. * @see m2ts/caption-stream.js
  303. **/
  304. this.parse = function(segment, videoTrackIds, timescales) {
  305. var parsedData;
  306. if (!this.isInitialized()) {
  307. return null;
  308. // This is not likely to be a video segment
  309. } else if (!videoTrackIds || !timescales) {
  310. return null;
  311. } else if (this.isNewInit(videoTrackIds, timescales)) {
  312. // Use the first video track only as there is no
  313. // mechanism to switch to other video tracks
  314. trackId = videoTrackIds[0];
  315. timescale = timescales[trackId];
  316. // If an init segment has not been seen yet, hold onto segment
  317. // data until we have one.
  318. // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  319. } else if (trackId === null || !timescale) {
  320. segmentCache.push(segment);
  321. return null;
  322. }
  323. // Now that a timescale and trackId is set, parse cached segments
  324. while (segmentCache.length > 0) {
  325. var cachedSegment = segmentCache.shift();
  326. this.parse(cachedSegment, videoTrackIds, timescales);
  327. }
  328. parsedData = parseEmbeddedCaptions(segment, trackId, timescale);
  329. if (parsedData && parsedData.logs) {
  330. parsedCaptions.logs = parsedCaptions.logs.concat(parsedData.logs);
  331. }
  332. if (parsedData === null || !parsedData.seiNals) {
  333. if (parsedCaptions.logs.length) {
  334. return {logs: parsedCaptions.logs, captions: [], captionStreams: []};
  335. }
  336. return null;
  337. }
  338. this.pushNals(parsedData.seiNals);
  339. // Force the parsed captions to be dispatched
  340. this.flushStream();
  341. return parsedCaptions;
  342. };
  343. /**
  344. * Pushes SEI NALUs onto CaptionStream
  345. * @param {Object[]} nals - A list of SEI nals parsed using `parseCaptionNals`
  346. * Assumes that `parseCaptionNals` has been called first
  347. * @see m2ts/caption-stream.js
  348. **/
  349. this.pushNals = function(nals) {
  350. if (!this.isInitialized() || !nals || nals.length === 0) {
  351. return null;
  352. }
  353. nals.forEach(function(nal) {
  354. captionStream.push(nal);
  355. });
  356. };
  357. /**
  358. * Flushes underlying CaptionStream to dispatch processed, displayable captions
  359. * @see m2ts/caption-stream.js
  360. **/
  361. this.flushStream = function() {
  362. if (!this.isInitialized()) {
  363. return null;
  364. }
  365. if (!parsingPartial) {
  366. captionStream.flush();
  367. } else {
  368. captionStream.partialFlush();
  369. }
  370. };
  371. /**
  372. * Reset caption buckets for new data
  373. **/
  374. this.clearParsedCaptions = function() {
  375. parsedCaptions.captions = [];
  376. parsedCaptions.captionStreams = {};
  377. parsedCaptions.logs = [];
  378. };
  379. /**
  380. * Resets underlying CaptionStream
  381. * @see m2ts/caption-stream.js
  382. **/
  383. this.resetCaptionStream = function() {
  384. if (!this.isInitialized()) {
  385. return null;
  386. }
  387. captionStream.reset();
  388. };
  389. /**
  390. * Convenience method to clear all captions flushed from the
  391. * CaptionStream and still being parsed
  392. * @see m2ts/caption-stream.js
  393. **/
  394. this.clearAllCaptions = function() {
  395. this.clearParsedCaptions();
  396. this.resetCaptionStream();
  397. };
  398. /**
  399. * Reset caption parser
  400. **/
  401. this.reset = function() {
  402. segmentCache = [];
  403. trackId = null;
  404. timescale = null;
  405. if (!parsedCaptions) {
  406. parsedCaptions = {
  407. captions: [],
  408. // CC1, CC2, CC3, CC4
  409. captionStreams: {},
  410. logs: []
  411. };
  412. } else {
  413. this.clearParsedCaptions();
  414. }
  415. this.resetCaptionStream();
  416. };
  417. this.reset();
  418. };
  419. module.exports = CaptionParser;