Skip to content

Commit

Permalink
Subtitle extraction from streaming media manifests #247
Browse files Browse the repository at this point in the history
Authored by fstirlitz
Modified from: ytdl-org/youtube-dl#6144

Closes: #73
Fixes:
ytdl-org/youtube-dl#6106
ytdl-org/youtube-dl#14977
ytdl-org/youtube-dl#21438
ytdl-org/youtube-dl#23609
ytdl-org/youtube-dl#28132

Might also fix (untested):
ytdl-org/youtube-dl#15424
ytdl-org/youtube-dl#18267
ytdl-org/youtube-dl#23899
ytdl-org/youtube-dl#24375
ytdl-org/youtube-dl#24595
ytdl-org/youtube-dl#27899

Related:
ytdl-org/youtube-dl#22379
ytdl-org/youtube-dl#24517
ytdl-org/youtube-dl#24886
ytdl-org/youtube-dl#27215

Notes:
* The functions `extractor.common._extract_..._formats` are still kept for compatibility
* Only some extractors have currently been moved to using `_extract_..._formats_and_subtitles`
* Direct subtitle manifests (without a master) are not supported and are wrongly identified as containing video formats
* AES support is untested
* The fragmented TTML subtitles extracted from DASH/ISM are valid, but are unsupported by `ffmpeg` and most video players
    * Their XML fragments can be dumped using `ffmpeg -i in.mp4 -f data -map 0 -c copy out.ttml`.
        Once the unnecessary headers are stripped out of this, it becomes a valid self-contained ttml file
    * The ttml subs downloaded from DASH manifests can also be directly opened with <https://github.com/SubtitleEdit>
* Fragmented WebVTT files extracted from DASH/ISM are also unsupported by most tools
    * Unlike the ttml files, the XML fragments of these cannot be dumped using `ffmpeg`
    * The webtt subs extracted from DASH can be parsed by <https://github.com/gpac/gpac>
    * But validity of the those extracted from ISM are untested
  • Loading branch information
pukkandan committed Apr 28, 2021
2 parents db9a564 + e8f834c commit be6202f
Show file tree
Hide file tree
Showing 25 changed files with 2,731 additions and 268 deletions.
497 changes: 489 additions & 8 deletions test/test_InfoExtractor.py

Large diffs are not rendered by default.

988 changes: 988 additions & 0 deletions test/testdata/ism/sintel.Manifest

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions test/testdata/m3u8/bipbop_16x9.m3u8
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#EXTM3U

#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES
#EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8"


#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8"
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8"
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8"
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8"
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8"
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8"
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8"
#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8"


#EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs"
gear1/prog_index.m3u8
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8"

#EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs"
gear2/prog_index.m3u8
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8"

#EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs"
gear3/prog_index.m3u8
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8"

#EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs"
gear4/prog_index.m3u8
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8"

#EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs"
gear5/prog_index.m3u8
#EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8"

#EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs"
gear0/prog_index.m3u8
351 changes: 351 additions & 0 deletions test/testdata/mpd/subtitles.mpd
Original file line number Diff line number Diff line change
@@ -0,0 +1,351 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- Created with Unified Streaming Platform (version=1.10.18-20255) -->
<MPD
xmlns:xsi="http:https://www.w3.org/2001/XMLSchema-instance"
xmlns="urn:mpeg:dash:schema:mpd:2011"
xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http:https://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd"
type="static"
mediaPresentationDuration="PT14M48S"
maxSegmentDuration="PT1M"
minBufferTime="PT10S"
profiles="urn:mpeg:dash:profile:isoff-live:2011">
<Period
id="1"
duration="PT14M48S">
<BaseURL>dash/</BaseURL>
<AdaptationSet
id="1"
group="1"
contentType="audio"
segmentAlignment="true"
audioSamplingRate="48000"
mimeType="audio/mp4"
codecs="mp4a.40.2"
startWithSAP="1">
<AudioChannelConfiguration
schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011"
value="2" />
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate
timescale="48000"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="96256" r="2" />
<S d="95232" />
<S d="3584" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="audio=128001"
bandwidth="128001">
</Representation>
</AdaptationSet>
<AdaptationSet
id="2"
group="3"
contentType="text"
lang="en"
mimeType="application/mp4"
codecs="stpp"
startWithSAP="1">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" />
<SegmentTemplate
timescale="1000"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="60000" r="9" />
<S d="24000" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="textstream_eng=1000"
bandwidth="1000">
</Representation>
</AdaptationSet>
<AdaptationSet
id="3"
group="2"
contentType="video"
par="960:409"
minBandwidth="100000"
maxBandwidth="4482000"
maxWidth="1689"
maxHeight="720"
segmentAlignment="true"
mimeType="video/mp4"
codecs="avc1.4D401F"
startWithSAP="1">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" />
<SegmentTemplate
timescale="12288"
initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash"
media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash">
<SegmentTimeline>
<S t="0" d="24576" r="443" />
</SegmentTimeline>
</SegmentTemplate>
<Representation
id="video=100000"
bandwidth="100000"
width="336"
height="144"
sar="2880:2863"
scanType="progressive">
</Representation>
<Representation
id="video=326000"
bandwidth="326000"
width="562"
height="240"
sar="115200:114929"
scanType="progressive">
</Representation>
<Representation
id="video=698000"
bandwidth="698000"
width="844"
height="360"
sar="86400:86299"
scanType="progressive">
</Representation>
<Representation
id="video=1493000"
bandwidth="1493000"
width="1126"
height="480"
sar="230400:230267"
scanType="progressive">
</Representation>
<Representation
id="video=4482000"
bandwidth="4482000"
width="1688"
height="720"
sar="86400:86299"
scanType="progressive">
</Representation>
</AdaptationSet>
</Period>
</MPD>
Loading

0 comments on commit be6202f

Please sign in to comment.