caption-parser.js 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. /**
  2. * mux.js
  3. *
  4. * Copyright (c) Brightcove
  5. * Licensed Apache-2.0 https://github.com/videojs/mux.js/blob/master/LICENSE
  6. *
  7. * Reads in-band CEA-708 captions out of FMP4 segments.
  8. * @see https://en.wikipedia.org/wiki/CEA-708
  9. */
  10. 'use strict';
  11. var discardEmulationPreventionBytes = require('../tools/caption-packet-parser').discardEmulationPreventionBytes;
  12. var CaptionStream = require('../m2ts/caption-stream').CaptionStream;
  13. var findBox = require('../mp4/find-box.js');
  14. var parseTfdt = require('../tools/parse-tfdt.js');
  15. var parseTrun = require('../tools/parse-trun.js');
  16. var parseTfhd = require('../tools/parse-tfhd.js');
  17. var window = require('global/window');
  18. /**
  19. * Maps an offset in the mdat to a sample based on the the size of the samples.
  20. * Assumes that `parseSamples` has been called first.
  21. *
  22. * @param {Number} offset - The offset into the mdat
  23. * @param {Object[]} samples - An array of samples, parsed using `parseSamples`
  24. * @return {?Object} The matching sample, or null if no match was found.
  25. *
  26. * @see ISO-BMFF-12/2015, Section 8.8.8
  27. **/
  28. var mapToSample = function mapToSample(offset, samples) {
  29. var approximateOffset = offset;
  30. for (var i = 0; i < samples.length; i++) {
  31. var sample = samples[i];
  32. if (approximateOffset < sample.size) {
  33. return sample;
  34. }
  35. approximateOffset -= sample.size;
  36. }
  37. return null;
  38. };
  39. /**
  40. * Finds SEI nal units contained in a Media Data Box.
  41. * Assumes that `parseSamples` has been called first.
  42. *
  43. * @param {Uint8Array} avcStream - The bytes of the mdat
  44. * @param {Object[]} samples - The samples parsed out by `parseSamples`
  45. * @param {Number} trackId - The trackId of this video track
  46. * @return {Object[]} seiNals - the parsed SEI NALUs found.
  47. * The contents of the seiNal should match what is expected by
  48. * CaptionStream.push (nalUnitType, size, data, escapedRBSP, pts, dts)
  49. *
  50. * @see ISO-BMFF-12/2015, Section 8.1.1
  51. * @see Rec. ITU-T H.264, 7.3.2.3.1
  52. **/
  53. var findSeiNals = function findSeiNals(avcStream, samples, trackId) {
  54. var avcView = new DataView(avcStream.buffer, avcStream.byteOffset, avcStream.byteLength),
  55. result = {
  56. logs: [],
  57. seiNals: []
  58. },
  59. seiNal,
  60. i,
  61. length,
  62. lastMatchedSample;
  63. for (i = 0; i + 4 < avcStream.length; i += length) {
  64. length = avcView.getUint32(i);
  65. i += 4; // Bail if this doesn't appear to be an H264 stream
  66. if (length <= 0) {
  67. continue;
  68. }
  69. switch (avcStream[i] & 0x1F) {
  70. case 0x06:
  71. var data = avcStream.subarray(i + 1, i + 1 + length);
  72. var matchingSample = mapToSample(i, samples);
  73. seiNal = {
  74. nalUnitType: 'sei_rbsp',
  75. size: length,
  76. data: data,
  77. escapedRBSP: discardEmulationPreventionBytes(data),
  78. trackId: trackId
  79. };
  80. if (matchingSample) {
  81. seiNal.pts = matchingSample.pts;
  82. seiNal.dts = matchingSample.dts;
  83. lastMatchedSample = matchingSample;
  84. } else if (lastMatchedSample) {
  85. // If a matching sample cannot be found, use the last
  86. // sample's values as they should be as close as possible
  87. seiNal.pts = lastMatchedSample.pts;
  88. seiNal.dts = lastMatchedSample.dts;
  89. } else {
  90. result.logs.push({
  91. level: 'warn',
  92. message: 'We\'ve encountered a nal unit without data at ' + i + ' for trackId ' + trackId + '. See mux.js#223.'
  93. });
  94. break;
  95. }
  96. result.seiNals.push(seiNal);
  97. break;
  98. default:
  99. break;
  100. }
  101. }
  102. return result;
  103. };
  104. /**
  105. * Parses sample information out of Track Run Boxes and calculates
  106. * the absolute presentation and decode timestamps of each sample.
  107. *
  108. * @param {Array<Uint8Array>} truns - The Trun Run boxes to be parsed
  109. * @param {Number|BigInt} baseMediaDecodeTime - base media decode time from tfdt
  110. @see ISO-BMFF-12/2015, Section 8.8.12
  111. * @param {Object} tfhd - The parsed Track Fragment Header
  112. * @see inspect.parseTfhd
  113. * @return {Object[]} the parsed samples
  114. *
  115. * @see ISO-BMFF-12/2015, Section 8.8.8
  116. **/
  117. var parseSamples = function parseSamples(truns, baseMediaDecodeTime, tfhd) {
  118. var currentDts = baseMediaDecodeTime;
  119. var defaultSampleDuration = tfhd.defaultSampleDuration || 0;
  120. var defaultSampleSize = tfhd.defaultSampleSize || 0;
  121. var trackId = tfhd.trackId;
  122. var allSamples = [];
  123. truns.forEach(function (trun) {
  124. // Note: We currently do not parse the sample table as well
  125. // as the trun. It's possible some sources will require this.
  126. // moov > trak > mdia > minf > stbl
  127. var trackRun = parseTrun(trun);
  128. var samples = trackRun.samples;
  129. samples.forEach(function (sample) {
  130. if (sample.duration === undefined) {
  131. sample.duration = defaultSampleDuration;
  132. }
  133. if (sample.size === undefined) {
  134. sample.size = defaultSampleSize;
  135. }
  136. sample.trackId = trackId;
  137. sample.dts = currentDts;
  138. if (sample.compositionTimeOffset === undefined) {
  139. sample.compositionTimeOffset = 0;
  140. }
  141. if (typeof currentDts === 'bigint') {
  142. sample.pts = currentDts + window.BigInt(sample.compositionTimeOffset);
  143. currentDts += window.BigInt(sample.duration);
  144. } else {
  145. sample.pts = currentDts + sample.compositionTimeOffset;
  146. currentDts += sample.duration;
  147. }
  148. });
  149. allSamples = allSamples.concat(samples);
  150. });
  151. return allSamples;
  152. };
  153. /**
  154. * Parses out caption nals from an FMP4 segment's video tracks.
  155. *
  156. * @param {Uint8Array} segment - The bytes of a single segment
  157. * @param {Number} videoTrackId - The trackId of a video track in the segment
  158. * @return {Object.<Number, Object[]>} A mapping of video trackId to
  159. * a list of seiNals found in that track
  160. **/
  161. var parseCaptionNals = function parseCaptionNals(segment, videoTrackId) {
  162. // To get the samples
  163. var trafs = findBox(segment, ['moof', 'traf']); // To get SEI NAL units
  164. var mdats = findBox(segment, ['mdat']);
  165. var captionNals = {};
  166. var mdatTrafPairs = []; // Pair up each traf with a mdat as moofs and mdats are in pairs
  167. mdats.forEach(function (mdat, index) {
  168. var matchingTraf = trafs[index];
  169. mdatTrafPairs.push({
  170. mdat: mdat,
  171. traf: matchingTraf
  172. });
  173. });
  174. mdatTrafPairs.forEach(function (pair) {
  175. var mdat = pair.mdat;
  176. var traf = pair.traf;
  177. var tfhd = findBox(traf, ['tfhd']); // Exactly 1 tfhd per traf
  178. var headerInfo = parseTfhd(tfhd[0]);
  179. var trackId = headerInfo.trackId;
  180. var tfdt = findBox(traf, ['tfdt']); // Either 0 or 1 tfdt per traf
  181. var baseMediaDecodeTime = tfdt.length > 0 ? parseTfdt(tfdt[0]).baseMediaDecodeTime : 0;
  182. var truns = findBox(traf, ['trun']);
  183. var samples;
  184. var result; // Only parse video data for the chosen video track
  185. if (videoTrackId === trackId && truns.length > 0) {
  186. samples = parseSamples(truns, baseMediaDecodeTime, headerInfo);
  187. result = findSeiNals(mdat, samples, trackId);
  188. if (!captionNals[trackId]) {
  189. captionNals[trackId] = {
  190. seiNals: [],
  191. logs: []
  192. };
  193. }
  194. captionNals[trackId].seiNals = captionNals[trackId].seiNals.concat(result.seiNals);
  195. captionNals[trackId].logs = captionNals[trackId].logs.concat(result.logs);
  196. }
  197. });
  198. return captionNals;
  199. };
  200. /**
  201. * Parses out inband captions from an MP4 container and returns
  202. * caption objects that can be used by WebVTT and the TextTrack API.
  203. * @see https://developer.mozilla.org/en-US/docs/Web/API/VTTCue
  204. * @see https://developer.mozilla.org/en-US/docs/Web/API/TextTrack
  205. * Assumes that `probe.getVideoTrackIds` and `probe.timescale` have been called first
  206. *
  207. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  208. * @param {Number} trackId - The id of the video track to parse
  209. * @param {Number} timescale - The timescale for the video track from the init segment
  210. *
  211. * @return {?Object[]} parsedCaptions - A list of captions or null if no video tracks
  212. * @return {Number} parsedCaptions[].startTime - The time to show the caption in seconds
  213. * @return {Number} parsedCaptions[].endTime - The time to stop showing the caption in seconds
  214. * @return {Object[]} parsedCaptions[].content - A list of individual caption segments
  215. * @return {String} parsedCaptions[].content.text - The visible content of the caption segment
  216. * @return {Number} parsedCaptions[].content.line - The line height from 1-15 for positioning of the caption segment
  217. * @return {Number} parsedCaptions[].content.position - The column indent percentage for cue positioning from 10-80
  218. **/
  219. var parseEmbeddedCaptions = function parseEmbeddedCaptions(segment, trackId, timescale) {
  220. var captionNals; // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  221. if (trackId === null) {
  222. return null;
  223. }
  224. captionNals = parseCaptionNals(segment, trackId);
  225. var trackNals = captionNals[trackId] || {};
  226. return {
  227. seiNals: trackNals.seiNals,
  228. logs: trackNals.logs,
  229. timescale: timescale
  230. };
  231. };
  232. /**
  233. * Converts SEI NALUs into captions that can be used by video.js
  234. **/
  235. var CaptionParser = function CaptionParser() {
  236. var isInitialized = false;
  237. var captionStream; // Stores segments seen before trackId and timescale are set
  238. var segmentCache; // Stores video track ID of the track being parsed
  239. var trackId; // Stores the timescale of the track being parsed
  240. var timescale; // Stores captions parsed so far
  241. var parsedCaptions; // Stores whether we are receiving partial data or not
  242. var parsingPartial;
  243. /**
  244. * A method to indicate whether a CaptionParser has been initalized
  245. * @returns {Boolean}
  246. **/
  247. this.isInitialized = function () {
  248. return isInitialized;
  249. };
  250. /**
  251. * Initializes the underlying CaptionStream, SEI NAL parsing
  252. * and management, and caption collection
  253. **/
  254. this.init = function (options) {
  255. captionStream = new CaptionStream();
  256. isInitialized = true;
  257. parsingPartial = options ? options.isPartial : false; // Collect dispatched captions
  258. captionStream.on('data', function (event) {
  259. // Convert to seconds in the source's timescale
  260. event.startTime = event.startPts / timescale;
  261. event.endTime = event.endPts / timescale;
  262. parsedCaptions.captions.push(event);
  263. parsedCaptions.captionStreams[event.stream] = true;
  264. });
  265. captionStream.on('log', function (log) {
  266. parsedCaptions.logs.push(log);
  267. });
  268. };
  269. /**
  270. * Determines if a new video track will be selected
  271. * or if the timescale changed
  272. * @return {Boolean}
  273. **/
  274. this.isNewInit = function (videoTrackIds, timescales) {
  275. if (videoTrackIds && videoTrackIds.length === 0 || timescales && typeof timescales === 'object' && Object.keys(timescales).length === 0) {
  276. return false;
  277. }
  278. return trackId !== videoTrackIds[0] || timescale !== timescales[trackId];
  279. };
  280. /**
  281. * Parses out SEI captions and interacts with underlying
  282. * CaptionStream to return dispatched captions
  283. *
  284. * @param {Uint8Array} segment - The fmp4 segment containing embedded captions
  285. * @param {Number[]} videoTrackIds - A list of video tracks found in the init segment
  286. * @param {Object.<Number, Number>} timescales - The timescales found in the init segment
  287. * @see parseEmbeddedCaptions
  288. * @see m2ts/caption-stream.js
  289. **/
  290. this.parse = function (segment, videoTrackIds, timescales) {
  291. var parsedData;
  292. if (!this.isInitialized()) {
  293. return null; // This is not likely to be a video segment
  294. } else if (!videoTrackIds || !timescales) {
  295. return null;
  296. } else if (this.isNewInit(videoTrackIds, timescales)) {
  297. // Use the first video track only as there is no
  298. // mechanism to switch to other video tracks
  299. trackId = videoTrackIds[0];
  300. timescale = timescales[trackId]; // If an init segment has not been seen yet, hold onto segment
  301. // data until we have one.
  302. // the ISO-BMFF spec says that trackId can't be zero, but there's some broken content out there
  303. } else if (trackId === null || !timescale) {
  304. segmentCache.push(segment);
  305. return null;
  306. } // Now that a timescale and trackId is set, parse cached segments
  307. while (segmentCache.length > 0) {
  308. var cachedSegment = segmentCache.shift();
  309. this.parse(cachedSegment, videoTrackIds, timescales);
  310. }
  311. parsedData = parseEmbeddedCaptions(segment, trackId, timescale);
  312. if (parsedData && parsedData.logs) {
  313. parsedCaptions.logs = parsedCaptions.logs.concat(parsedData.logs);
  314. }
  315. if (parsedData === null || !parsedData.seiNals) {
  316. if (parsedCaptions.logs.length) {
  317. return {
  318. logs: parsedCaptions.logs,
  319. captions: [],
  320. captionStreams: []
  321. };
  322. }
  323. return null;
  324. }
  325. this.pushNals(parsedData.seiNals); // Force the parsed captions to be dispatched
  326. this.flushStream();
  327. return parsedCaptions;
  328. };
  329. /**
  330. * Pushes SEI NALUs onto CaptionStream
  331. * @param {Object[]} nals - A list of SEI nals parsed using `parseCaptionNals`
  332. * Assumes that `parseCaptionNals` has been called first
  333. * @see m2ts/caption-stream.js
  334. **/
  335. this.pushNals = function (nals) {
  336. if (!this.isInitialized() || !nals || nals.length === 0) {
  337. return null;
  338. }
  339. nals.forEach(function (nal) {
  340. captionStream.push(nal);
  341. });
  342. };
  343. /**
  344. * Flushes underlying CaptionStream to dispatch processed, displayable captions
  345. * @see m2ts/caption-stream.js
  346. **/
  347. this.flushStream = function () {
  348. if (!this.isInitialized()) {
  349. return null;
  350. }
  351. if (!parsingPartial) {
  352. captionStream.flush();
  353. } else {
  354. captionStream.partialFlush();
  355. }
  356. };
  357. /**
  358. * Reset caption buckets for new data
  359. **/
  360. this.clearParsedCaptions = function () {
  361. parsedCaptions.captions = [];
  362. parsedCaptions.captionStreams = {};
  363. parsedCaptions.logs = [];
  364. };
  365. /**
  366. * Resets underlying CaptionStream
  367. * @see m2ts/caption-stream.js
  368. **/
  369. this.resetCaptionStream = function () {
  370. if (!this.isInitialized()) {
  371. return null;
  372. }
  373. captionStream.reset();
  374. };
  375. /**
  376. * Convenience method to clear all captions flushed from the
  377. * CaptionStream and still being parsed
  378. * @see m2ts/caption-stream.js
  379. **/
  380. this.clearAllCaptions = function () {
  381. this.clearParsedCaptions();
  382. this.resetCaptionStream();
  383. };
  384. /**
  385. * Reset caption parser
  386. **/
  387. this.reset = function () {
  388. segmentCache = [];
  389. trackId = null;
  390. timescale = null;
  391. if (!parsedCaptions) {
  392. parsedCaptions = {
  393. captions: [],
  394. // CC1, CC2, CC3, CC4
  395. captionStreams: {},
  396. logs: []
  397. };
  398. } else {
  399. this.clearParsedCaptions();
  400. }
  401. this.resetCaptionStream();
  402. };
  403. this.reset();
  404. };
  405. module.exports = CaptionParser;