Source: lib/text/vtt_text_parser.js

  1. /*! @license
  2. * Shaka Player
  3. * Copyright 2016 Google LLC
  4. * SPDX-License-Identifier: Apache-2.0
  5. */
  6. goog.provide('shaka.text.VttTextParser');
  7. goog.require('goog.asserts');
  8. goog.require('shaka.log');
  9. goog.require('shaka.media.ManifestParser');
  10. goog.require('shaka.text.Cue');
  11. goog.require('shaka.text.CueRegion');
  12. goog.require('shaka.text.TextEngine');
  13. goog.require('shaka.util.Error');
  14. goog.require('shaka.util.StringUtils');
  15. goog.require('shaka.util.TextParser');
  16. goog.require('shaka.util.TXml');
  17. /**
  18. * @implements {shaka.extern.TextParser}
  19. * @export
  20. */
  21. shaka.text.VttTextParser = class {
  22. /** Constructs a VTT parser. */
  23. constructor() {
  24. /** @private {boolean} */
  25. this.sequenceMode_ = false;
  26. /** @private {string} */
  27. this.manifestType_ = shaka.media.ManifestParser.UNKNOWN;
  28. }
  29. /**
  30. * @override
  31. * @export
  32. */
  33. parseInit(data) {
  34. goog.asserts.assert(false, 'VTT does not have init segments');
  35. }
  36. /**
  37. * @override
  38. * @export
  39. */
  40. setSequenceMode(sequenceMode) {
  41. this.sequenceMode_ = sequenceMode;
  42. }
  43. /**
  44. * @override
  45. * @export
  46. */
  47. setManifestType(manifestType) {
  48. this.manifestType_ = manifestType;
  49. }
  50. /**
  51. * @override
  52. * @export
  53. */
  54. parseMedia(data, time) {
  55. const VttTextParser = shaka.text.VttTextParser;
  56. // Get the input as a string. Normalize newlines to \n.
  57. let str = shaka.util.StringUtils.fromUTF8(data);
  58. str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n');
  59. const blocks = str.split(/\n{2,}/m);
  60. if (!/^WEBVTT($|[ \t\n])/m.test(blocks[0])) {
  61. throw new shaka.util.Error(
  62. shaka.util.Error.Severity.CRITICAL,
  63. shaka.util.Error.Category.TEXT,
  64. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  65. }
  66. // Depending on "segmentRelativeVttTiming" configuration,
  67. // "vttOffset" will correspond to either "periodStart" (default)
  68. // or "segmentStart", for segmented VTT where timings are relative
  69. // to the beginning of each segment.
  70. // NOTE: "periodStart" is the timestamp offset applied via TextEngine.
  71. // It is no longer closely tied to periods, but the name stuck around.
  72. // NOTE: This offset and the flag choosing its meaning have no effect on
  73. // HLS content, which should use X-TIMESTAMP-MAP and periodStart instead.
  74. let offset = time.vttOffset;
  75. // Only use 'X-TIMESTAMP-MAP' with HLS. This overrides offset above.
  76. if (blocks[0].includes('X-TIMESTAMP-MAP') &&
  77. this.manifestType_ == shaka.media.ManifestParser.HLS) {
  78. if (this.sequenceMode_) {
  79. // Compute a different, rollover-based offset for sequence mode.
  80. offset = this.computeHlsSequenceModeOffset_(blocks[0], time);
  81. } else {
  82. // Calculate the offset from the segment startTime.
  83. offset = time.segmentStart;
  84. }
  85. }
  86. // Parse VTT regions.
  87. /* !Array.<!shaka.text.CueRegion> */
  88. const regions = [];
  89. for (const line of blocks[0].split('\n')) {
  90. if (/^Region:/.test(line)) {
  91. const region = VttTextParser.parseRegion_(line);
  92. regions.push(region);
  93. }
  94. }
  95. /** @type {!Map.<string, shaka.text.Cue>} */
  96. const styles = new Map();
  97. VttTextParser.addDefaultTextColor_(styles);
  98. // Parse cues.
  99. const ret = [];
  100. for (const block of blocks.slice(1)) {
  101. const lines = block.split('\n');
  102. VttTextParser.parseStyle_(lines, styles);
  103. const cue = VttTextParser.parseCue_(lines, offset, regions, styles);
  104. if (cue) {
  105. ret.push(cue);
  106. }
  107. }
  108. return ret;
  109. }
  110. /**
  111. * @param {string} headerBlock Contains X-TIMESTAMP-MAP.
  112. * @param {shaka.extern.TextParser.TimeContext} time
  113. * @return {number}
  114. * @private
  115. */
  116. computeHlsSequenceModeOffset_(headerBlock, time) {
  117. // https://bit.ly/2K92l7y
  118. // The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
  119. // the rest of the media.
  120. // The header format is 'X-TIMESTAMP-MAP=MPEGTS:n,LOCAL:m'
  121. // (the attributes can go in any order)
  122. // where n is MPEG-2 time and m is cue time it maps to.
  123. // For example 'X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:900000'
  124. // means an offset of 10 seconds
  125. // 900000/MPEG_TIMESCALE - cue time.
  126. const cueTimeMatch = headerBlock.match(
  127. /LOCAL:((?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3}))/m);
  128. const mpegTimeMatch = headerBlock.match(/MPEGTS:(\d+)/m);
  129. if (!cueTimeMatch || !mpegTimeMatch) {
  130. throw new shaka.util.Error(
  131. shaka.util.Error.Severity.CRITICAL,
  132. shaka.util.Error.Category.TEXT,
  133. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  134. }
  135. const parser = new shaka.util.TextParser(cueTimeMatch[1]);
  136. const cueTime = shaka.text.VttTextParser.parseTime_(parser);
  137. if (cueTime == null) {
  138. throw new shaka.util.Error(
  139. shaka.util.Error.Severity.CRITICAL,
  140. shaka.util.Error.Category.TEXT,
  141. shaka.util.Error.Code.INVALID_TEXT_HEADER);
  142. }
  143. let mpegTime = Number(mpegTimeMatch[1]);
  144. const mpegTimescale = shaka.text.VttTextParser.MPEG_TIMESCALE_;
  145. const rolloverSeconds =
  146. shaka.text.VttTextParser.TS_ROLLOVER_ / mpegTimescale;
  147. let segmentStart = time.segmentStart - time.periodStart;
  148. while (segmentStart >= rolloverSeconds) {
  149. segmentStart -= rolloverSeconds;
  150. mpegTime += shaka.text.VttTextParser.TS_ROLLOVER_;
  151. }
  152. return time.periodStart + mpegTime / mpegTimescale - cueTime;
  153. }
  154. /**
  155. * Add default color
  156. *
  157. * @param {!Map.<string, shaka.text.Cue>} styles
  158. * @private
  159. */
  160. static addDefaultTextColor_(styles) {
  161. const textColor = shaka.text.Cue.defaultTextColor;
  162. for (const [key, value] of Object.entries(textColor)) {
  163. const cue = new shaka.text.Cue(0, 0, '');
  164. cue.color = value;
  165. styles.set('.' + key, cue);
  166. }
  167. const bgColor = shaka.text.Cue.defaultTextBackgroundColor;
  168. for (const [key, value] of Object.entries(bgColor)) {
  169. const cue = new shaka.text.Cue(0, 0, '');
  170. cue.backgroundColor = value;
  171. styles.set('.' + key, cue);
  172. }
  173. }
  174. /**
  175. * Parses a string into a Region object.
  176. *
  177. * @param {string} text
  178. * @return {!shaka.text.CueRegion}
  179. * @private
  180. */
  181. static parseRegion_(text) {
  182. const VttTextParser = shaka.text.VttTextParser;
  183. const parser = new shaka.util.TextParser(text);
  184. // The region string looks like this:
  185. // Region: id=fred width=50% lines=3 regionanchor=0%,100%
  186. // viewportanchor=10%,90% scroll=up
  187. const region = new shaka.text.CueRegion();
  188. // Skip 'Region:'
  189. parser.readWord();
  190. parser.skipWhitespace();
  191. let word = parser.readWord();
  192. while (word) {
  193. if (!VttTextParser.parseRegionSetting_(region, word)) {
  194. shaka.log.warning(
  195. 'VTT parser encountered an invalid VTTRegion setting: ', word,
  196. ' The setting will be ignored.');
  197. }
  198. parser.skipWhitespace();
  199. word = parser.readWord();
  200. }
  201. return region;
  202. }
  203. /**
  204. * Parses a style block into a Cue object.
  205. *
  206. * @param {!Array.<string>} text
  207. * @param {!Map.<string, shaka.text.Cue>} styles
  208. * @private
  209. */
  210. static parseStyle_(text, styles) {
  211. // Skip empty blocks.
  212. if (text.length == 1 && !text[0]) {
  213. return;
  214. }
  215. // Skip comment blocks.
  216. if (/^NOTE($|[ \t])/.test(text[0])) {
  217. return;
  218. }
  219. // Only style block are allowed.
  220. if (text[0] != 'STYLE') {
  221. return;
  222. }
  223. /** @type {!Array.<!Array.<string>>} */
  224. const styleBlocks = [];
  225. let lastBlockIndex = -1;
  226. for (let i = 1; i < text.length; i++) {
  227. if (text[i].includes('::cue')) {
  228. styleBlocks.push([]);
  229. lastBlockIndex = styleBlocks.length - 1;
  230. }
  231. if (lastBlockIndex == -1) {
  232. continue;
  233. }
  234. styleBlocks[lastBlockIndex].push(text[i]);
  235. if (text[i].includes('}')) {
  236. lastBlockIndex = -1;
  237. }
  238. }
  239. for (const styleBlock of styleBlocks) {
  240. let styleSelector = 'global';
  241. // Look for what is within parentheses. For example:
  242. // <code>:: cue (b) {</code>, what we are looking for is <code>b</code>
  243. const selector = styleBlock[0].match(/\((.*)\)/);
  244. if (selector) {
  245. styleSelector = selector.pop();
  246. }
  247. // We start at 1 to avoid '::cue' and end earlier to avoid '}'
  248. let propertyLines = styleBlock.slice(1, -1);
  249. if (styleBlock[0].includes('}')) {
  250. const payload = /\{(.*?)\}/.exec(styleBlock[0]);
  251. if (payload) {
  252. propertyLines = payload[1].split(';');
  253. }
  254. }
  255. // Continue styles over multiple selectors if necessary.
  256. // For example,
  257. // ::cue(b) { background: white; } ::cue(b) { color: blue; }
  258. // should set both the background and foreground of bold tags.
  259. let cue = styles.get(styleSelector);
  260. if (!cue) {
  261. cue = new shaka.text.Cue(0, 0, '');
  262. }
  263. let validStyle = false;
  264. for (let i = 0; i < propertyLines.length; i++) {
  265. // We look for CSS properties. As a general rule they are separated by
  266. // <code>:</code>. Eg: <code>color: red;</code>
  267. const lineParts = /^\s*([^:]+):\s*(.*)/.exec(propertyLines[i]);
  268. if (lineParts) {
  269. const name = lineParts[1].trim();
  270. const value = lineParts[2].trim().replace(';', '');
  271. switch (name) {
  272. case 'background-color':
  273. case 'background':
  274. validStyle = true;
  275. cue.backgroundColor = value;
  276. break;
  277. case 'color':
  278. validStyle = true;
  279. cue.color = value;
  280. break;
  281. case 'font-family':
  282. validStyle = true;
  283. cue.fontFamily = value;
  284. break;
  285. case 'font-size':
  286. validStyle = true;
  287. cue.fontSize = value;
  288. break;
  289. case 'font-weight':
  290. if (parseInt(value, 10) >= 700 || value == 'bold') {
  291. validStyle = true;
  292. cue.fontWeight = shaka.text.Cue.fontWeight.BOLD;
  293. }
  294. break;
  295. case 'font-style':
  296. switch (value) {
  297. case 'normal':
  298. validStyle = true;
  299. cue.fontStyle = shaka.text.Cue.fontStyle.NORMAL;
  300. break;
  301. case 'italic':
  302. validStyle = true;
  303. cue.fontStyle = shaka.text.Cue.fontStyle.ITALIC;
  304. break;
  305. case 'oblique':
  306. validStyle = true;
  307. cue.fontStyle = shaka.text.Cue.fontStyle.OBLIQUE;
  308. break;
  309. }
  310. break;
  311. case 'opacity':
  312. validStyle = true;
  313. cue.opacity = parseFloat(value);
  314. break;
  315. case 'text-combine-upright':
  316. validStyle = true;
  317. cue.textCombineUpright = value;
  318. break;
  319. case 'text-shadow':
  320. validStyle = true;
  321. cue.textShadow = value;
  322. break;
  323. case 'white-space':
  324. validStyle = true;
  325. cue.wrapLine = value != 'noWrap';
  326. break;
  327. default:
  328. shaka.log.warning('VTT parser encountered an unsupported style: ',
  329. lineParts);
  330. break;
  331. }
  332. }
  333. }
  334. if (validStyle) {
  335. styles.set(styleSelector, cue);
  336. }
  337. }
  338. }
  339. /**
  340. * Parses a text block into a Cue object.
  341. *
  342. * @param {!Array.<string>} text
  343. * @param {number} timeOffset
  344. * @param {!Array.<!shaka.text.CueRegion>} regions
  345. * @param {!Map.<string, shaka.text.Cue>} styles
  346. * @return {shaka.text.Cue}
  347. * @private
  348. */
  349. static parseCue_(text, timeOffset, regions, styles) {
  350. const VttTextParser = shaka.text.VttTextParser;
  351. // Skip empty blocks.
  352. if (text.length == 1 && !text[0]) {
  353. return null;
  354. }
  355. // Skip comment blocks.
  356. if (/^NOTE($|[ \t])/.test(text[0])) {
  357. return null;
  358. }
  359. // Skip style and region blocks.
  360. if (text[0] == 'STYLE' || text[0] == 'REGION') {
  361. return null;
  362. }
  363. let id = null;
  364. if (!text[0].includes('-->')) {
  365. id = text[0];
  366. text.splice(0, 1);
  367. }
  368. // Parse the times.
  369. const parser = new shaka.util.TextParser(text[0]);
  370. let start = VttTextParser.parseTime_(parser);
  371. const expect = parser.readRegex(/[ \t]+-->[ \t]+/g);
  372. let end = VttTextParser.parseTime_(parser);
  373. if (start == null || expect == null || end == null) {
  374. shaka.log.alwaysWarn(
  375. 'Failed to parse VTT time code. Cue skipped:', id, text);
  376. return null;
  377. }
  378. start += timeOffset;
  379. end += timeOffset;
  380. // Get the payload.
  381. const payload = text.slice(1).join('\n').trim();
  382. let cue = null;
  383. if (styles.has('global')) {
  384. cue = styles.get('global').clone();
  385. cue.startTime = start;
  386. cue.endTime = end;
  387. cue.payload = '';
  388. } else {
  389. cue = new shaka.text.Cue(start, end, '');
  390. }
  391. // Parse optional settings.
  392. parser.skipWhitespace();
  393. let word = parser.readWord();
  394. while (word) {
  395. if (!VttTextParser.parseCueSetting(cue, word, regions)) {
  396. shaka.log.warning('VTT parser encountered an invalid VTT setting: ',
  397. word,
  398. ' The setting will be ignored.');
  399. }
  400. parser.skipWhitespace();
  401. word = parser.readWord();
  402. }
  403. VttTextParser.parseCueStyles(payload, cue, styles);
  404. if (id != null) {
  405. cue.id = id;
  406. }
  407. return cue;
  408. }
  409. /**
  410. * Parses a WebVTT styles from the given payload.
  411. *
  412. * @param {string} payload
  413. * @param {!shaka.text.Cue} rootCue
  414. * @param {!Map.<string, shaka.text.Cue>} styles
  415. */
  416. static parseCueStyles(payload, rootCue, styles) {
  417. const VttTextParser = shaka.text.VttTextParser;
  418. const StringUtils = shaka.util.StringUtils;
  419. const TXml = shaka.util.TXml;
  420. // Optimization for unstyled payloads.
  421. if (!payload.includes('<')) {
  422. rootCue.payload = StringUtils.htmlUnescape(payload);
  423. return;
  424. }
  425. if (styles.size === 0) {
  426. VttTextParser.addDefaultTextColor_(styles);
  427. }
  428. payload = VttTextParser.replaceKaraokeStylePayload_(payload);
  429. payload = VttTextParser.replaceVoiceStylePayload_(payload);
  430. payload = VttTextParser.escapeInvalidChevrons_(payload);
  431. const xmlPayload = '<span>' + payload + '</span>';
  432. let element;
  433. try {
  434. element = TXml.parseXmlString(xmlPayload, 'span');
  435. } catch (e) {
  436. shaka.log.warning('cue parse fail: ', e);
  437. }
  438. if (element) {
  439. const childNodes = element.children;
  440. if (childNodes.length == 1) {
  441. const childNode = childNodes[0];
  442. if (!TXml.isNode(childNode)) {
  443. rootCue.payload = StringUtils.htmlUnescape(payload);
  444. return;
  445. }
  446. }
  447. for (const childNode of childNodes) {
  448. VttTextParser.generateCueFromElement_(childNode, rootCue, styles);
  449. }
  450. } else {
  451. shaka.log.warning('The cue\'s markup could not be parsed: ', payload);
  452. rootCue.payload = StringUtils.htmlUnescape(payload);
  453. }
  454. }
  455. /**
  456. * This method converts invalid > chevrons to HTML entities.
  457. * It also removes < chevrons as per spec.
  458. *
  459. * @param {!string} input
  460. * @return {string}
  461. * @private
  462. */
  463. static escapeInvalidChevrons_(input) {
  464. // Used to map HTML entities to characters.
  465. const htmlEscapes = {
  466. '< ': '',
  467. ' >': ' &gt;',
  468. };
  469. const reEscapedHtml = /(< +>|<\s|\s>)/g;
  470. const reHasEscapedHtml = RegExp(reEscapedHtml.source);
  471. // This check is an optimization, since replace always makes a copy
  472. if (input && reHasEscapedHtml.test(input)) {
  473. return input.replace(reEscapedHtml, (entity) => {
  474. return htmlEscapes[entity] || '';
  475. });
  476. }
  477. return input || '';
  478. }
  479. /**
  480. * Converts voice style tag to be valid for xml parsing
  481. * For example,
  482. * input: <v Shaka>Test
  483. * output: <v.voice-Shaka>Test</v.voice-Shaka>
  484. *
  485. * @param {string} payload
  486. * @return {string} processed payload
  487. * @private
  488. */
  489. static replaceVoiceStylePayload_(payload) {
  490. const voiceTag = 'v';
  491. const names = [];
  492. let nameStart = -1;
  493. let newPayload = '';
  494. let hasVoiceEndTag = false;
  495. for (let i = 0; i < payload.length; i++) {
  496. // This condition is used to manage tags that have end tags.
  497. if (payload[i] === '/') {
  498. const end = payload.indexOf('>', i);
  499. if (end === -1) {
  500. return payload;
  501. }
  502. const tagEnd = payload.substring(i + 1, end);
  503. if (!tagEnd || tagEnd != voiceTag) {
  504. newPayload += payload[i];
  505. continue;
  506. }
  507. hasVoiceEndTag = true;
  508. let tagStart = null;
  509. if (names.length) {
  510. tagStart = names[names.length -1];
  511. }
  512. if (!tagStart) {
  513. newPayload += payload[i];
  514. } else if (tagStart === tagEnd) {
  515. newPayload += '/' + tagEnd + '>';
  516. i += tagEnd.length + 1;
  517. } else {
  518. if (!tagStart.startsWith(voiceTag)) {
  519. newPayload += payload[i];
  520. continue;
  521. }
  522. newPayload += '/' + tagStart + '>';
  523. i += tagEnd.length + 1;
  524. }
  525. } else {
  526. // Here we only want the tag name, not any other payload.
  527. if (payload[i] === '<') {
  528. nameStart = i + 1;
  529. if (payload[nameStart] != voiceTag) {
  530. nameStart = -1;
  531. }
  532. } else if (payload[i] === '>') {
  533. if (nameStart > 0) {
  534. names.push(payload.substr(nameStart, i - nameStart));
  535. nameStart = -1;
  536. }
  537. }
  538. newPayload += payload[i];
  539. }
  540. }
  541. for (const name of names) {
  542. const newName = name.replace(' ', '.voice-');
  543. newPayload = newPayload.replace(`<${name}>`, `<${newName}>`);
  544. newPayload = newPayload.replace(`</${name}>`, `</${newName}>`);
  545. if (!hasVoiceEndTag) {
  546. newPayload += `</${newName}>`;
  547. }
  548. }
  549. return newPayload;
  550. }
  551. /**
  552. * Converts karaoke style tag to be valid for xml parsing
  553. * For example,
  554. * input: Text <00:00:00.450> time <00:00:01.450> 1
  555. * output: Text <div time="00:00:00.450"> time
  556. * <div time="00:00:01.450"> 1</div></div>
  557. *
  558. * @param {string} payload
  559. * @return {string} processed payload
  560. * @private
  561. */
  562. static replaceKaraokeStylePayload_(payload) {
  563. const names = [];
  564. let nameStart = -1;
  565. for (let i = 0; i < payload.length; i++) {
  566. if (payload[i] === '<') {
  567. nameStart = i + 1;
  568. } else if (payload[i] === '>') {
  569. if (nameStart > 0) {
  570. const name = payload.substr(nameStart, i - nameStart);
  571. if (name.match(shaka.text.VttTextParser.timeFormat_)) {
  572. names.push(name);
  573. }
  574. nameStart = -1;
  575. }
  576. }
  577. }
  578. let newPayload = payload;
  579. for (const name of names) {
  580. const replaceTag = '<' + name + '>';
  581. const startTag = '<div time="' + name + '">';
  582. const endTag = '</div>';
  583. newPayload = newPayload.replace(replaceTag, startTag);
  584. newPayload += endTag;
  585. }
  586. return newPayload;
  587. }
  588. /**
  589. * @param {string} value
  590. * @param {string} defaultValue
  591. * @private
  592. */
  593. static getOrDefault_(value, defaultValue) {
  594. if (value && value.length > 0) {
  595. return value;
  596. }
  597. return defaultValue;
  598. }
  599. /**
  600. * Merges values created in parseStyle_
  601. * @param {!shaka.text.Cue} cue
  602. * @param {shaka.text.Cue} refCue
  603. * @private
  604. */
  605. static mergeStyle_(cue, refCue) {
  606. if (!refCue) {
  607. return;
  608. }
  609. const VttTextParser = shaka.text.VttTextParser;
  610. // Overwrites if new value string length > 0
  611. cue.backgroundColor = VttTextParser.getOrDefault_(
  612. refCue.backgroundColor, cue.backgroundColor);
  613. cue.color = VttTextParser.getOrDefault_(
  614. refCue.color, cue.color);
  615. cue.fontFamily = VttTextParser.getOrDefault_(
  616. refCue.fontFamily, cue.fontFamily);
  617. cue.fontSize = VttTextParser.getOrDefault_(
  618. refCue.fontSize, cue.fontSize);
  619. cue.textShadow = VttTextParser.getOrDefault_(
  620. refCue.textShadow, cue.textShadow);
  621. // Overwrite with new values as unable to determine
  622. // if new value is set or not
  623. cue.fontWeight = refCue.fontWeight;
  624. cue.fontStyle = refCue.fontStyle;
  625. cue.opacity = refCue.opacity;
  626. cue.rubyTag = refCue.rubyTag;
  627. cue.textCombineUpright = refCue.textCombineUpright;
  628. cue.wrapLine = refCue.wrapLine;
  629. }
  630. /**
  631. * @param {!shaka.extern.xml.Node} element
  632. * @param {!shaka.text.Cue} rootCue
  633. * @param {!Map.<string, shaka.text.Cue>} styles
  634. * @private
  635. */
  636. static generateCueFromElement_(element, rootCue, styles) {
  637. const VttTextParser = shaka.text.VttTextParser;
  638. const TXml = shaka.util.TXml;
  639. const nestedCue = rootCue.clone();
  640. // We don't want propagate some properties.
  641. nestedCue.nestedCues = [];
  642. nestedCue.payload = '';
  643. nestedCue.rubyTag = '';
  644. // We don't want propagate some position settings
  645. nestedCue.line = null;
  646. nestedCue.region = new shaka.text.CueRegion();
  647. nestedCue.position = null;
  648. nestedCue.size = 0;
  649. if (shaka.util.TXml.isNode(element)) {
  650. const bold = shaka.text.Cue.fontWeight.BOLD;
  651. const italic = shaka.text.Cue.fontStyle.ITALIC;
  652. const underline = shaka.text.Cue.textDecoration.UNDERLINE;
  653. const tags = element.tagName.split(/(?=[ .])+/g);
  654. for (const tag of tags) {
  655. let styleTag = tag;
  656. // White blanks at start indicate that the style is a voice
  657. if (styleTag.startsWith('.voice-')) {
  658. const voice = styleTag.split('-').pop();
  659. styleTag = `v[voice="${voice}"]`;
  660. // The specification allows to have quotes and not, so we check to
  661. // see which one is being used.
  662. if (!styles.has(styleTag)) {
  663. styleTag = `v[voice=${voice}]`;
  664. }
  665. }
  666. if (styles.has(styleTag)) {
  667. VttTextParser.mergeStyle_(nestedCue, styles.get(styleTag));
  668. }
  669. switch (tag) {
  670. case 'br': {
  671. const lineBreakCue = shaka.text.Cue.lineBreak(
  672. nestedCue.startTime, nestedCue.endTime);
  673. rootCue.nestedCues.push(lineBreakCue);
  674. return;
  675. }
  676. case 'b':
  677. nestedCue.fontWeight = bold;
  678. break;
  679. case 'i':
  680. nestedCue.fontStyle = italic;
  681. break;
  682. case 'u':
  683. nestedCue.textDecoration.push(underline);
  684. break;
  685. case 'font': {
  686. const color = element.attributes['color'];
  687. if (color) {
  688. nestedCue.color = color;
  689. }
  690. break;
  691. }
  692. case 'div': {
  693. const time = element.attributes['time'];
  694. if (!time) {
  695. break;
  696. }
  697. const parser = new shaka.util.TextParser(time);
  698. const cueTime = shaka.text.VttTextParser.parseTime_(parser);
  699. if (cueTime) {
  700. nestedCue.startTime = cueTime;
  701. }
  702. break;
  703. }
  704. case 'ruby':
  705. case 'rp':
  706. case 'rt':
  707. nestedCue.rubyTag = tag;
  708. break;
  709. default:
  710. break;
  711. }
  712. }
  713. }
  714. const isTextNode = (item) => shaka.util.TXml.isText(item);
  715. const childNodes = element.children;
  716. if (isTextNode(element) ||
  717. (childNodes.length == 1 && isTextNode(childNodes[0]))) {
  718. // Trailing line breaks may lost when convert cue to HTML tag
  719. // Need to insert line break cue to preserve line breaks
  720. const textArr = TXml.getTextContents(element).split('\n');
  721. let isFirst = true;
  722. for (const text of textArr) {
  723. if (!isFirst) {
  724. const lineBreakCue = shaka.text.Cue.lineBreak(
  725. nestedCue.startTime, nestedCue.endTime);
  726. rootCue.nestedCues.push(lineBreakCue);
  727. }
  728. if (text.length > 0) {
  729. const textCue = nestedCue.clone();
  730. textCue.payload = shaka.util.StringUtils.htmlUnescape(text);
  731. rootCue.nestedCues.push(textCue);
  732. }
  733. isFirst = false;
  734. }
  735. } else {
  736. rootCue.nestedCues.push(nestedCue);
  737. for (const childNode of childNodes) {
  738. VttTextParser.generateCueFromElement_(childNode, nestedCue, styles);
  739. }
  740. }
  741. }
  742. /**
  743. * Parses a WebVTT setting from the given word.
  744. *
  745. * @param {!shaka.text.Cue} cue
  746. * @param {string} word
  747. * @param {!Array.<!shaka.text.CueRegion>} regions
  748. * @return {boolean} True on success.
  749. */
  750. static parseCueSetting(cue, word, regions) {
  751. const VttTextParser = shaka.text.VttTextParser;
  752. let results = null;
  753. if ((results = /^align:(start|middle|center|end|left|right)$/.exec(word))) {
  754. VttTextParser.setTextAlign_(cue, results[1]);
  755. } else if ((results = /^vertical:(lr|rl)$/.exec(word))) {
  756. VttTextParser.setVerticalWritingMode_(cue, results[1]);
  757. } else if ((results = /^size:([\d.]+)%$/.exec(word))) {
  758. cue.size = Number(results[1]);
  759. } else if ((results =
  760. // eslint-disable-next-line max-len
  761. /^position:([\d.]+)%(?:,(line-left|line-right|middle|center|start|end|auto))?$/
  762. .exec(word))) {
  763. cue.position = Number(results[1]);
  764. if (results[2]) {
  765. VttTextParser.setPositionAlign_(cue, results[2]);
  766. }
  767. } else if ((results = /^region:(.*)$/.exec(word))) {
  768. const region = VttTextParser.getRegionById_(regions, results[1]);
  769. if (region) {
  770. cue.region = region;
  771. }
  772. } else {
  773. return VttTextParser.parsedLineValueAndInterpretation_(cue, word);
  774. }
  775. return true;
  776. }
  777. /**
  778. *
  779. * @param {!Array.<!shaka.text.CueRegion>} regions
  780. * @param {string} id
  781. * @return {?shaka.text.CueRegion}
  782. * @private
  783. */
  784. static getRegionById_(regions, id) {
  785. const regionsWithId = regions.filter((region) => {
  786. return region.id == id;
  787. });
  788. if (!regionsWithId.length) {
  789. shaka.log.warning('VTT parser could not find a region with id: ',
  790. id,
  791. ' The region will be ignored.');
  792. return null;
  793. }
  794. goog.asserts.assert(regionsWithId.length == 1,
  795. 'VTTRegion ids should be unique!');
  796. return regionsWithId[0];
  797. }
  798. /**
  799. * Parses a WebVTTRegion setting from the given word.
  800. *
  801. * @param {!shaka.text.CueRegion} region
  802. * @param {string} word
  803. * @return {boolean} True on success.
  804. * @private
  805. */
  806. static parseRegionSetting_(region, word) {
  807. let results = null;
  808. if ((results = /^id=(.*)$/.exec(word))) {
  809. region.id = results[1];
  810. } else if ((results = /^width=(\d{1,2}|100)%$/.exec(word))) {
  811. region.width = Number(results[1]);
  812. } else if ((results = /^lines=(\d+)$/.exec(word))) {
  813. region.height = Number(results[1]);
  814. region.heightUnits = shaka.text.CueRegion.units.LINES;
  815. } else if ((results = /^regionanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  816. .exec(word))) {
  817. region.regionAnchorX = Number(results[1]);
  818. region.regionAnchorY = Number(results[2]);
  819. } else if ((results = /^viewportanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
  820. .exec(word))) {
  821. region.viewportAnchorX = Number(results[1]);
  822. region.viewportAnchorY = Number(results[2]);
  823. } else if ((results = /^scroll=up$/.exec(word))) {
  824. region.scroll = shaka.text.CueRegion.scrollMode.UP;
  825. } else {
  826. return false;
  827. }
  828. return true;
  829. }
  830. /**
  831. * @param {!shaka.text.Cue} cue
  832. * @param {string} align
  833. * @private
  834. */
  835. static setTextAlign_(cue, align) {
  836. const Cue = shaka.text.Cue;
  837. if (align == 'middle') {
  838. cue.textAlign = Cue.textAlign.CENTER;
  839. } else {
  840. goog.asserts.assert(align.toUpperCase() in Cue.textAlign,
  841. align.toUpperCase() +
  842. ' Should be in Cue.textAlign values!');
  843. cue.textAlign = Cue.textAlign[align.toUpperCase()];
  844. }
  845. }
  846. /**
  847. * @param {!shaka.text.Cue} cue
  848. * @param {string} align
  849. * @private
  850. */
  851. static setPositionAlign_(cue, align) {
  852. const Cue = shaka.text.Cue;
  853. if (align == 'line-left' || align == 'start') {
  854. cue.positionAlign = Cue.positionAlign.LEFT;
  855. } else if (align == 'line-right' || align == 'end') {
  856. cue.positionAlign = Cue.positionAlign.RIGHT;
  857. } else if (align == 'center' || align == 'middle') {
  858. cue.positionAlign = Cue.positionAlign.CENTER;
  859. } else {
  860. cue.positionAlign = Cue.positionAlign.AUTO;
  861. }
  862. }
  863. /**
  864. * @param {!shaka.text.Cue} cue
  865. * @param {string} value
  866. * @private
  867. */
  868. static setVerticalWritingMode_(cue, value) {
  869. const Cue = shaka.text.Cue;
  870. if (value == 'lr') {
  871. cue.writingMode = Cue.writingMode.VERTICAL_LEFT_TO_RIGHT;
  872. } else {
  873. cue.writingMode = Cue.writingMode.VERTICAL_RIGHT_TO_LEFT;
  874. }
  875. }
  876. /**
  877. * @param {!shaka.text.Cue} cue
  878. * @param {string} word
  879. * @return {boolean}
  880. * @private
  881. */
  882. static parsedLineValueAndInterpretation_(cue, word) {
  883. const Cue = shaka.text.Cue;
  884. let results = null;
  885. if ((results = /^line:([\d.]+)%(?:,(start|end|center))?$/.exec(word))) {
  886. cue.lineInterpretation = Cue.lineInterpretation.PERCENTAGE;
  887. cue.line = Number(results[1]);
  888. if (results[2]) {
  889. goog.asserts.assert(
  890. results[2].toUpperCase() in Cue.lineAlign,
  891. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  892. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  893. }
  894. } else if ((results =
  895. /^line:(-?\d+)(?:,(start|end|center))?$/.exec(word))) {
  896. cue.lineInterpretation = Cue.lineInterpretation.LINE_NUMBER;
  897. cue.line = Number(results[1]);
  898. if (results[2]) {
  899. goog.asserts.assert(
  900. results[2].toUpperCase() in Cue.lineAlign,
  901. results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
  902. cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
  903. }
  904. } else {
  905. return false;
  906. }
  907. return true;
  908. }
  909. /**
  910. * Parses a WebVTT time from the given parser.
  911. *
  912. * @param {!shaka.util.TextParser} parser
  913. * @return {?number}
  914. * @private
  915. */
  916. static parseTime_(parser) {
  917. const results = parser.readRegex(shaka.text.VttTextParser.timeFormat_);
  918. if (results == null) {
  919. return null;
  920. }
  921. // This capture is optional, but will still be in the array as undefined,
  922. // in which case it is 0.
  923. const hours = Number(results[1]) || 0;
  924. const minutes = Number(results[2]);
  925. const seconds = Number(results[3]);
  926. const milliseconds = Number(results[4]);
  927. if (minutes > 59 || seconds > 59) {
  928. return null;
  929. }
  930. return (milliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
  931. }
  932. };
  933. /**
  934. * @const {number}
  935. * @private
  936. */
  937. shaka.text.VttTextParser.MPEG_TIMESCALE_ = 90000;
  938. /**
  939. * At this value, timestamps roll over in TS content.
  940. * @const {number}
  941. * @private
  942. */
  943. shaka.text.VttTextParser.TS_ROLLOVER_ = 0x200000000;
  944. /**
  945. * @const
  946. * @private {!RegExp}
  947. * @example 00:00.000 or 00:00:00.000 or 0:00:00.000 or
  948. * 00:00.00 or 00:00:00.00 or 0:00:00.00
  949. */
  950. shaka.text.VttTextParser.timeFormat_ =
  951. /(?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{2,3})/g;
  952. shaka.text.TextEngine.registerParser(
  953. 'text/vtt', () => new shaka.text.VttTextParser());
  954. shaka.text.TextEngine.registerParser(
  955. 'text/vtt; codecs="vtt"', () => new shaka.text.VttTextParser());
  956. shaka.text.TextEngine.registerParser(
  957. 'text/vtt; codecs="wvtt"', () => new shaka.text.VttTextParser());