Skip to content

Commit

Permalink
feat(toDash): Add option to include WebVTT or TTML captions (#673)
Browse files Browse the repository at this point in the history
  • Loading branch information
absidue authored Jun 25, 2024
1 parent e5aab9a commit bd9f6ac
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 11 deletions.
6 changes: 6 additions & 0 deletions src/core/mixins/MediaInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,16 @@ export default class MediaInfo {
}

let storyboards;
let captions;

if (options.include_thumbnails && player_response.storyboards) {
storyboards = player_response.storyboards;
}

if (typeof options.captions_format === 'string' && player_response.captions?.caption_tracks) {
captions = player_response.captions.caption_tracks;
}

return FormatUtils.toDash(
this.streaming_data,
this.page[0].video_details?.is_post_live_dvr,
Expand All @@ -68,6 +73,7 @@ export default class MediaInfo {
this.#actions.session.player,
this.#actions,
storyboards,
captions,
options
);
}
Expand Down
18 changes: 10 additions & 8 deletions src/parser/classes/PlayerCaptionsTracklist.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,19 @@ import Text from './misc/Text.js';
import { YTNode } from '../helpers.js';
import type { RawNode } from '../index.js';

export interface CaptionTrackData {
base_url: string;
name: Text;
vss_id: string;
language_code: string;
kind?: 'asr' | 'frc';
is_translatable: boolean;
}

export default class PlayerCaptionsTracklist extends YTNode {
static type = 'PlayerCaptionsTracklist';

caption_tracks?: {
base_url: string;
name: Text;
vss_id: string;
language_code: string;
kind?: 'asr' | 'frc';
is_translatable: boolean;
}[];
caption_tracks?: CaptionTrackData[];

audio_tracks?: {
audio_track_id: string;
Expand Down
9 changes: 9 additions & 0 deletions src/types/StreamingInfoOptions.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
export interface StreamingInfoOptions {
/**
* The format to use for the captions, when the video has captions.
* If this option is not set, the DASH manifest will not include the captions.
*
* Possible values:
* * `vtt`: Tells YouTube to return the captions in the WebVTT format
* * `ttml`: Tells YouTube to return the captions in the TTML format
*/
captions_format?: 'vtt' | 'ttml';
/**
* The label to use for the non-DRC streams when a video has DRC and streams.
*
Expand Down
36 changes: 34 additions & 2 deletions src/utils/DashManifest.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import type { SegmentInfo as FSegmentInfo } from './StreamingInfo.js';
import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js';
import type PlayerLiveStoryboardSpec from '../parser/classes/PlayerLiveStoryboardSpec.js';
import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js';
import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js';

interface DashManifestProps {
streamingData: IStreamingData;
Expand All @@ -24,6 +25,7 @@ interface DashManifestProps {
player?: Player;
actions?: Actions;
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec;
captionTracks?: CaptionTrackData[];
}

async function OTFPostLiveDvrSegmentInfo({ info }: { info: FSegmentInfo }) {
Expand Down Expand Up @@ -73,14 +75,16 @@ async function DashManifest({
player,
actions,
storyboards,
captionTracks,
options
}: DashManifestProps) {
const {
getDuration,
audio_sets,
video_sets,
image_sets
} = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, options);
image_sets,
text_sets
} = getStreamingInfo(streamingData, isPostLiveDvr, transformURL, rejectFormat, cpn, player, actions, storyboards, captionTracks, options);

// XXX: DASH spec: https://standards.iso.org/ittf/PubliclyAvailableStandards/c083314_ISO_IEC%2023009-1_2022(en).zip

Expand Down Expand Up @@ -229,6 +233,32 @@ async function DashManifest({
</adaptation-set>;
})
}
{
text_sets.map((set, index) => {
return <adaptation-set
id={index + audio_sets.length + video_sets.length + image_sets.length}
mimeType={set.mime_type}
lang={set.language}
contentType="text"
>
<role
schemeIdUri="urn:mpeg:dash:role:2011"
value="caption"
/>
<label id={index + audio_sets.length}>
{set.track_name}
</label>
<representation
id={set.representation.uid}
bandwidth="0"
>
<base-url>
{set.representation.base_url}
</base-url>
</representation>
</adaptation-set>;
})
}
</period>
</mpd>;
}
Expand All @@ -242,6 +272,7 @@ export function toDash(
player?: Player,
actions?: Actions,
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec,
caption_tracks?: CaptionTrackData[],
options?: StreamingInfoOptions
) {
if (!streaming_data)
Expand All @@ -258,6 +289,7 @@ export function toDash(
player={player}
actions={actions}
storyboards={storyboards}
captionTracks={caption_tracks}
/>
);
}
50 changes: 49 additions & 1 deletion src/utils/StreamingInfo.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type { Format } from '../parser/misc.js';
import type { PlayerLiveStoryboardSpec } from '../parser/nodes.js';
import type { FormatFilter, URLTransformer } from '../types/FormatUtils.js';
import type { StreamingInfoOptions } from '../types/StreamingInfoOptions.js';
import type { CaptionTrackData } from '../parser/classes/PlayerCaptionsTracklist.js';

const TAG_ = 'StreamingInfo';

Expand All @@ -20,6 +21,7 @@ export interface StreamingInfo {
audio_sets: AudioSet[];
video_sets: VideoSet[];
image_sets: ImageSet[];
text_sets: TextSet[];
}

export interface AudioSet {
Expand Down Expand Up @@ -122,6 +124,18 @@ export interface ImageRepresentation {
getURL(n: number): string;
}

export interface TextSet {
mime_type: string;
language: string;
track_name: string;
representation: TextRepresentation;
}

export interface TextRepresentation {
uid: string;
base_url: string;
}

interface PostLiveDvrInfo {
duration: number,
segment_count: number
Expand Down Expand Up @@ -735,6 +749,29 @@ function getImageSets(
}));
}

function getTextSets(
caption_tracks: CaptionTrackData[],
format: 'vtt' | 'ttml',
transform_url: URLTransformer
): TextSet[] {
const mime_type = format === 'vtt' ? 'text/vtt' : 'application/ttml+xml';

return caption_tracks.map((caption_track) => {
const url = new URL(caption_track.base_url);
url.searchParams.set('fmt', format);

return {
mime_type,
language: caption_track.language_code,
track_name: caption_track.name.toString(),
representation: {
uid: `text-${caption_track.vss_id}`,
base_url: transform_url(url).toString()
}
};
});
}

export function getStreamingInfo(
streaming_data?: IStreamingData,
is_post_live_dvr = false,
Expand All @@ -744,6 +781,7 @@ export function getStreamingInfo(
player?: Player,
actions?: Actions,
storyboards?: PlayerStoryboardSpec | PlayerLiveStoryboardSpec,
caption_tracks?: CaptionTrackData[],
options?: StreamingInfoOptions
) {
if (!streaming_data)
Expand Down Expand Up @@ -839,11 +877,21 @@ export function getStreamingInfo(
image_sets = getImageSets(duration, actions, storyboards, url_transformer);
}

let text_sets: TextSet[] = [];

if (caption_tracks && options?.captions_format) {
if ((options.captions_format as string) !== 'vtt' && (options.captions_format as string) !== 'ttml') {
throw new InnertubeError('Invalid captions format', options.captions_format);
}
text_sets = getTextSets(caption_tracks, options.captions_format, url_transformer);
}

const info : StreamingInfo = {
getDuration,
audio_sets,
video_sets,
image_sets
image_sets,
text_sets
};

return info;
Expand Down

0 comments on commit bd9f6ac

Please sign in to comment.