-
Notifications
You must be signed in to change notification settings - Fork 3
/
download_video_from_list.py
130 lines (108 loc) · 4.16 KB
/
download_video_from_list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
from seleniumwire import webdriver
from selenium.webdriver.chrome.options import Options
from seleniumwire.request import Response
from webdriver_manager.chrome import ChromeDriverManager
import wget
from videoprops import get_video_properties
import multiprocessing as mp
"""
The audio and video files are seperated. To combine them, you need to install and use FFMPEG.
For Windows users, you need to install media pack so that ffmpeg can run properly.
Find your build version of windows, and download related media pack here:
https://support.microsoft.com/en-us/topic/media-feature-pack-list-for-windows-n-editions-c1c6fffa-d052-8338-7a79-a4bb980a700a
"""
DOWNLOAD_FOLDER = "D:\\Colorful_Update\\"
NUM_PROC = 4
LIST_FILE = "vlist.txt"
def download_video(title, html_url):
mp4_file = DOWNLOAD_FOLDER + title + '.mp4'
mp4_tmp = DOWNLOAD_FOLDER + title + '_.mp4'
mp3_file = DOWNLOAD_FOLDER + title + '.mp3'
if os.path.exists(mp4_file):
return "Exists"
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0.3 Safari/605.1.15"')
driver = webdriver.Chrome(executable_path=ChromeDriverManager().install(), options=chrome_options)
driver.get(html_url)
driver.implicitly_wait(9)
elems = driver.find_elements_by_xpath('//div[@class="commentList"]')
print(title)
video_links = {}
max_video_size = -1
max_video_link = None
max_audio_size = -1
max_audio_link = None
for request in driver.requests:
if request.response:
if request.response.headers['Content-Type'] == 'video/mp4':
new_url, _ = request.url.split('&net=')
media_size = int(request.response.headers['Content-Range'].split('/')[-1])
if 'media-video-avc1' in new_url:
media_type = 'video'
if media_size > max_video_size:
max_video_link = new_url
max_video_size = media_size
else:
media_type = 'audio'
if media_size > max_audio_size:
max_audio_link = new_url
max_audio_size = media_size
if new_url not in video_links:
video_links[new_url] = (media_type, media_size)
print(video_links)
for k, info in video_links.items():
print("-" * 20)
print(k)
print(info)
print("=" * 20)
driver.close()
del driver
try:
mp3_file = mp3_file.replace('?', '')
mp4_tmp = mp4_tmp.replace('?', '')
mp4_file = mp4_file.replace('?', '')
wget.download(max_audio_link, out=mp3_file)
wget.download(max_video_link, out=mp4_tmp)
except Exception as e:
print(e)
print("Download Error", max_audio_link, max_video_link)
if os.path.exists(mp4_tmp) and os.path.exists(mp3_file):
os.system(f'ffmpeg -i "{mp4_tmp}" -i "{mp3_file}" -c copy "{mp4_file}"')
os.remove(mp4_tmp)
os.remove(mp3_file)
return "Done"
def main():
title_dict = {}
with open(LIST_FILE, 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
if len(line.strip()) == 0:
continue
title, url = line.split('\t')
url = 'https://www.ixigua.com' + url.strip()
title = title.strip()
title_dict[url] = title
print(title_dict)
proceses = []
count = 0
items = list(title_dict.items())
while True:
if len(proceses) < NUM_PROC:
url, title = items[count]
p = mp.Process(target=download_video, args=(title, url))
proceses.append(p)
p.start()
count += 1
for idx, p in enumerate(proceses):
if not p.is_alive():
proceses.pop(idx)
p.join()
break
if count == len(items):
break
for p in proceses:
p.join()
if __name__ == '__main__':
main()