增加优酷弹幕下载以及其他改进

This commit is contained in:
xhlove
2020-01-11 17:57:51 +08:00
parent 78083c2295
commit 11edd6dfad
10 changed files with 258 additions and 39 deletions

1
.gitignore vendored
View File

@@ -1,6 +1,7 @@
# 额外 # 额外
.vscode/ .vscode/
releases/ releases/
test/
*.ass *.ass
methods/calc_danmu_pos.py methods/calc_danmu_pos.py

View File

@@ -3,7 +3,7 @@
''' '''
# 作者: weimo # 作者: weimo
# 创建日期: 2020-01-04 19:14:39 # 创建日期: 2020-01-04 19:14:39
# 上次编辑时间: 2020-01-05 14:47:53 # 上次编辑时间 : 2020-01-11 17:49:40
# 一个人的命运啊,当然要靠自我奋斗,但是... # 一个人的命运啊,当然要靠自我奋斗,但是...
''' '''
@@ -13,6 +13,7 @@ from argparse import ArgumentParser
from sites.qq import main as qq from sites.qq import main as qq
from sites.iqiyi import main as iqiyi from sites.iqiyi import main as iqiyi
from sites.youku import main as youku
from pfunc.cfunc import check_url_site from pfunc.cfunc import check_url_site
# ------------------------------------------- # -------------------------------------------
@@ -34,8 +35,9 @@ def main():
parser.add_argument("-vid", "--vid", default="", help="下载vid对应视频的弹幕支持同时多个vid需要用逗号隔开") parser.add_argument("-vid", "--vid", default="", help="下载vid对应视频的弹幕支持同时多个vid需要用逗号隔开")
parser.add_argument("-aid", "--aid", default="", help="下载aid对应视频的弹幕爱奇艺合集") parser.add_argument("-aid", "--aid", default="", help="下载aid对应视频的弹幕爱奇艺合集")
parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕支持同时多个tvid需要用逗号隔开") parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕支持同时多个tvid需要用逗号隔开")
parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕")
parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕") parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕")
parser.add_argument("-y", "--y", action="store_true", help="覆盖原有弹幕而不提示") parser.add_argument("-y", "--y", action="store_false", help="默认覆盖原有弹幕而不提示")
args = parser.parse_args() args = parser.parse_args()
# print(args.__dict__) # print(args.__dict__)
if args.url != "": if args.url != "":
@@ -46,6 +48,8 @@ def main():
subtitles = qq(args) subtitles = qq(args)
if args.site == "iqiyi": if args.site == "iqiyi":
subtitles = iqiyi(args) subtitles = iqiyi(args)
if args.site == "youku":
subtitles = youku(args)
if __name__ == "__main__": if __name__ == "__main__":
# 打包 --> pyinstaller GetDanMu.spec # 打包 --> pyinstaller GetDanMu.spec

View File

@@ -1,7 +1,7 @@
<!-- <!--
* @作者: weimo * @作者: weimo
* @创建日期: 2020-01-04 18:45:58 * @创建日期: 2020-01-04 18:45:58
* @上次编辑时间 : 2020-01-05 14:42:42 * @上次编辑时间 : 2020-01-11 17:48:19
* @一个人的命运啊,当然要靠自我奋斗,但是... * @一个人的命运啊,当然要靠自我奋斗,但是...
--> -->
# GetDanMu # GetDanMu
@@ -13,6 +13,7 @@
| :--: | :-- | :-----: | :-----: | :-----: | | :--: | :-- | :-----: | :-----: | :-----: |
| **腾讯视频** | <https://v.qq.com/> |✓|✓| | | **腾讯视频** | <https://v.qq.com/> |✓|✓| |
| **爱奇艺** | <https://www.iqiyi.com/> |✓|✓|✓| | **爱奇艺** | <https://www.iqiyi.com/> |✓|✓|✓|
| **优酷** | <https://v.youku.com/> |✓|✓|✓|
## 可能存在的问题 ## 可能存在的问题
- 下载进度接近100%时暂时没有反应 - 下载进度接近100%时暂时没有反应
@@ -24,6 +25,12 @@
# 更新日志 # 更新日志
## 2020/1/11
- 增加优酷弹幕下载,支持合集,支持通过单集直接下载合集弹幕(暂时仅限优酷)
- 改进去重方式
- 优酷的视频id用vid指代若下载合集请使用连接或通过`-series`选项下载合集弹幕
- 加入下载进度显示,后续可能改进为单行刷新
## 2020/1/5 ## 2020/1/5
- 增加了通过链接下载爱奇艺视频弹幕的方法,支持综艺合集。 - 增加了通过链接下载爱奇艺视频弹幕的方法,支持综艺合集。

View File

@@ -3,7 +3,7 @@
''' '''
# 作者: weimo # 作者: weimo
# 创建日期: 2020-01-04 19:14:46 # 创建日期: 2020-01-04 19:14:46
# 上次编辑时间: 2020-01-05 14:45:55 # 上次编辑时间 : 2020-01-11 17:20:21
# 一个人的命运啊,当然要靠自我奋斗,但是... # 一个人的命运啊,当然要靠自我奋斗,但是...
''' '''
@@ -62,10 +62,4 @@ def check_font(font):
font_style_name = font font_style_name = font
else: else:
pass pass
return font_path, font_style_name return font_path, font_style_name
def check_content(content: str, comments: list):
content = content.replace(" ", "")
if content in comments:
return
return content

View File

@@ -3,11 +3,24 @@
''' '''
# 作者: weimo # 作者: weimo
# 创建日期: 2020-01-05 12:45:18 # 创建日期: 2020-01-05 12:45:18
# 上次编辑时间: 2020-01-05 14:44:42 # 上次编辑时间 : 2020-01-11 17:37:22
# 一个人的命运啊,当然要靠自我奋斗,但是... # 一个人的命运啊,当然要靠自我奋斗,但是...
''' '''
import hashlib
from urllib.parse import urlparse from urllib.parse import urlparse
def remove_same_danmu(comments: list):
# 在原有基础上pop会引起索引变化 所以还是采用下面这个方式
contents = []
for comment in comments:
content, color, timepoint = comment
content = content.replace(" ", "")
if content in contents:
continue
else:
contents.append([content, color, timepoint])
return contents
def check_url_site(url): def check_url_site(url):
return urlparse(url).netloc.split(".")[-2] return urlparse(url).netloc.split(".")[-2]
@@ -21,4 +34,11 @@ def check_url_locale(url):
if urlparse(url).netloc.split(".")[0] == "tw": if urlparse(url).netloc.split(".")[0] == "tw":
return flag["tw"] return flag["tw"]
else: else:
return flag["cn"] return flag["cn"]
def yk_msg_sign(msg: str):
return hashlib.new("md5", bytes(msg + "MkmC9SoIw6xCkSKHhJ7b5D2r51kBiREr", "utf-8")).hexdigest()
def yk_t_sign(token, t, appkey, data):
text = "&".join([token, t, appkey, data])
return hashlib.new('md5', bytes(text, 'utf-8')).hexdigest()

View File

@@ -3,7 +3,7 @@
''' '''
# 作者: weimo # 作者: weimo
# 创建日期: 2020-01-04 19:17:44 # 创建日期: 2020-01-04 19:17:44
# 上次编辑时间: 2020-01-05 14:45:03 # 上次编辑时间 : 2020-01-11 17:25:09
# 一个人的命运啊,当然要靠自我奋斗,但是... # 一个人的命运啊,当然要靠自我奋斗,但是...
''' '''
import os import os
@@ -11,16 +11,19 @@ import os
from basic.ass import get_ass_head, check_font from basic.ass import get_ass_head, check_font
from methods.assbase import ASS from methods.assbase import ASS
from methods.sameheight import SameHeight from methods.sameheight import SameHeight
from pfunc.cfunc import remove_same_danmu
def write_one_video_subtitles(file_path, comments, args): def write_one_video_subtitles(file_path, comments, args):
# 对于合集则每次都都得检查一次 也可以放在上一级 放在这里 考虑后面可能特殊指定字体的情况 # 对于合集则每次都都得检查一次 也可以放在上一级 放在这里 考虑后面可能特殊指定字体的情况
font_path, font_style_name = check_font(args.font) font_path, font_style_name = check_font(args.font)
ass_head = get_ass_head(font_style_name, args.font_size) ass_head = get_ass_head(font_style_name, args.font_size)
get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", font_path=font_path, font_size=args.font_size) get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", font_path=font_path, font_size=int(args.font_size))
subtitle = ASS(file_path, get_xy_obj, font=font_style_name) subtitle = ASS(file_path, get_xy_obj, font=font_style_name)
comments = remove_same_danmu(comments)
for comment in comments: for comment in comments:
subtitle.create_new_line(comment) subtitle.create_new_line(comment)
write_lines_to_file(ass_head, subtitle.lines, file_path) write_lines_to_file(ass_head, subtitle.lines, file_path)
return comments
def write_lines_to_file(ass_head, lines, file_path): def write_lines_to_file(ass_head, lines, file_path):
with open(file_path, "a+", encoding="utf-8") as f: with open(file_path, "a+", encoding="utf-8") as f:

View File

@@ -3,7 +3,7 @@
''' '''
# 作者: weimo # 作者: weimo
# 创建日期: 2020-01-04 19:14:43 # 创建日期: 2020-01-04 19:14:43
# 上次编辑时间: 2020-01-05 14:47:16 # 上次编辑时间 : 2020-01-11 17:42:30
# 一个人的命运啊,当然要靠自我奋斗,但是... # 一个人的命运啊,当然要靠自我奋斗,但是...
''' '''
import re import re
@@ -192,4 +192,64 @@ def get_vinfos_by_url(url):
else: else:
return get_vinfos(aid, locale=locale) return get_vinfos(aid, locale=locale)
#-------------------------------------------iqiyi-------------------------------------------- #-------------------------------------------iqiyi--------------------------------------------
#-------------------------------------------youku--------------------------------------------
def get_vinfos_by_url_youku(url, isall=False):
vid_patterns = ["[\s\S]+?youku.com/video/id_(/+?)\.html", "[\s\S]+?youku.com/v_show/id_(.+?)\.html"]
video_id = matchit(vid_patterns, url)
show_id_patterns = ["[\s\S]+?youku.com/v_nextstage/id_(/+?)\.html", "[\s\S]+?youku.com/show/id_z(.+?)\.html", "[\s\S]+?youku.com/show_page/id_z(.+?)\.html", "[\s\S]+?youku.com/alipay_video/id_(.+?)\.html"]
show_id = matchit(show_id_patterns, url)
if video_id is None and show_id is None:
return None
if video_id:
return get_vinfos_by_video_id(video_id, isall=isall)
if show_id.__len__() == 20 and show_id == show_id.lower():
return get_vinfos_by_show_id(show_id)
else:
return get_vinfos_by_video_id(show_id, isall=isall)
def get_vinfos_by_video_id(video_id, isall=False):
api_url = "https://openapi.youku.com/v2/videos/show.json?client_id=53e6cc67237fc59a&package=com.huawei.hwvplayer.youku&ext=show&video_id={}".format(video_id)
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("get_vinfos_by_video_id error info -->", e)
return None
data = json.loads(r)
if isall:
show_id = data["show"]["id"]
return get_vinfos_by_show_id(show_id)
duration = 0
if data.get("duration"):
duration = int(float(data["duration"]))
if data.get("title"):
name = data["title"] + "_" + str(duration)
else:
name = "优酷未知" + "_" + str(duration)
vinfo = [name, duration, video_id]
return [vinfo]
def get_vinfos_by_show_id(show_id):
api_url = "https://openapi.youku.com/v2/shows/videos.json?show_videotype=正片&count=100&client_id=53e6cc67237fc59a&page=1&show_id={}&package=com.huawei.hwvplayer.youku".format(show_id)
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("get_vinfos_by_show_id error info -->", e)
return None
data = json.loads(r)["videos"]
if data.__len__() == 0:
return None
vinfos = []
for video in data:
duration = 0
if video.get("duration"):
duration = int(float(video["duration"]))
if video.get("title"):
name = video["title"] + "_" + str(duration)
else:
name = "优酷未知_{}".format(video["id"]) + "_" + str(duration)
vinfos.append([name, duration, video["id"]])
return vinfos
#-------------------------------------------youku--------------------------------------------

View File

@@ -3,7 +3,7 @@
''' '''
# 作者: weimo # 作者: weimo
# 创建日期: 2020-01-04 19:14:41 # 创建日期: 2020-01-04 19:14:41
# 上次编辑时间: 2020-01-05 14:45:17 # 上次编辑时间 : 2020-01-11 17:23:32
# 一个人的命运啊,当然要靠自我奋斗,但是... # 一个人的命运啊,当然要靠自我奋斗,但是...
''' '''
@@ -14,7 +14,6 @@ from zlib import decompress
from xmltodict import parse from xmltodict import parse
from basic.vars import iqiyiplayer from basic.vars import iqiyiplayer
from basic.ass import check_content
from pfunc.dump_to_ass import check_file, write_one_video_subtitles from pfunc.dump_to_ass import check_file, write_one_video_subtitles
from pfunc.request_info import get_vinfos, get_vinfos_by_url, get_vinfo_by_tvid from pfunc.request_info import get_vinfos, get_vinfos_by_url, get_vinfo_by_tvid
@@ -48,7 +47,6 @@ def get_danmu_by_tvid(name, duration, tvid):
continue continue
# with open("raw_xml.json", "w", encoding="utf-8") as f: # with open("raw_xml.json", "w", encoding="utf-8") as f:
# f.write(json.dumps(parse(raw_xml), ensure_ascii=False, indent=4)) # f.write(json.dumps(parse(raw_xml), ensure_ascii=False, indent=4))
contents = []
if entry.__class__ != list: if entry.__class__ != list:
entry = [entry] entry = [entry]
for comment in entry: for comment in entry:
@@ -58,13 +56,8 @@ def get_danmu_by_tvid(name, duration, tvid):
if bulletInfo.__class__ != list: if bulletInfo.__class__ != list:
bulletInfo = [bulletInfo] bulletInfo = [bulletInfo]
for info in bulletInfo: for info in bulletInfo:
content = check_content(info["content"], contents)
if content is None:
continue
else:
contents.append(content)
color = [info["color"]] color = [info["color"]]
comments.append([content, color, int(comment["int"])]) comments.append([info["content"], color, int(comment["int"])])
print("已下载{:.2f}%".format(index * timestamp * 100 / duration)) print("已下载{:.2f}%".format(index * timestamp * 100 / duration))
index += 1 index += 1
comments = sorted(comments, key=lambda _: _[-1]) comments = sorted(comments, key=lambda _: _[-1])
@@ -93,8 +86,8 @@ def main(args):
flag, file_path = check_file(name, skip=args.y) flag, file_path = check_file(name, skip=args.y)
if flag is False: if flag is False:
print("跳过{}".format(name)) print("跳过{}".format(name))
return continue
comments = get_danmu_by_tvid(name, duration, tvid) comments = get_danmu_by_tvid(name, duration, tvid)
write_one_video_subtitles(file_path, comments, args) comments = write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments}) subtitles.update({file_path:comments})
return subtitles return subtitles

View File

@@ -3,7 +3,7 @@
''' '''
# 作者: weimo # 作者: weimo
# 创建日期: 2020-01-04 19:14:37 # 创建日期: 2020-01-04 19:14:37
# 上次编辑时间: 2020-01-05 14:47:36 # 上次编辑时间 : 2020-01-11 17:25:34
# 一个人的命运啊,当然要靠自我奋斗,但是... # 一个人的命运啊,当然要靠自我奋斗,但是...
''' '''
@@ -13,7 +13,6 @@ import json
import requests import requests
from basic.vars import qqlive from basic.vars import qqlive
from basic.ass import check_content
from pfunc.dump_to_ass import check_file, write_one_video_subtitles from pfunc.dump_to_ass import check_file, write_one_video_subtitles
from pfunc.request_info import get_all_vids_by_cid as get_vids from pfunc.request_info import get_all_vids_by_cid as get_vids
from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id
@@ -80,13 +79,7 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
# timestamp不变 再试一次 # timestamp不变 再试一次
continue continue
danmu_count = danmu["count"] danmu_count = danmu["count"]
contents = []
for comment in danmu["comments"]: for comment in danmu["comments"]:
content = check_content(comment["content"], contents)
if content is None:
continue
else:
contents.append(content)
if comment["content_style"]: if comment["content_style"]:
style = json.loads(comment["content_style"]) style = json.loads(comment["content_style"])
if style.get("gradient_colors"): if style.get("gradient_colors"):
@@ -97,7 +90,7 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
color = ["ffffff"] color = ["ffffff"]
else: else:
color = ["ffffff"] color = ["ffffff"]
comments.append([content, color, comment["timepoint"]]) comments.append([comment["content"], color, comment["timepoint"]])
print("已下载{:.2f}%".format(params["timestamp"]*100/duration)) print("已下载{:.2f}%".format(params["timestamp"]*100/duration))
params["timestamp"] += 30 params["timestamp"] += 30
comments = sorted(comments, key=lambda _: _[-1]) comments = sorted(comments, key=lambda _: _[-1])
@@ -157,8 +150,11 @@ def main(args):
vinfos = get_video_info_by_vid(vids) vinfos = get_video_info_by_vid(vids)
subtitles = {} subtitles = {}
for vinfo in vinfos: for vinfo in vinfos:
comments, file_path = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y) infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y)
write_one_video_subtitles(file_path, comments, args) if infos is None:
continue
comments, file_path = infos
comments = write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments}) subtitles.update({file_path:comments})
return subtitles return subtitles

141
sites/youku.py Normal file
View File

@@ -0,0 +1,141 @@
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-05 14:52:21
# 上次编辑时间 : 2020-01-11 17:53:14
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import re
import time
import json
import base64
import requests
from basic.vars import chrome
from pfunc.dump_to_ass import check_file, write_one_video_subtitles
from pfunc.cfunc import yk_msg_sign, yk_t_sign
from pfunc.request_info import get_vinfos_by_show_id, get_vinfos_by_video_id, get_vinfos_by_url_youku
def get_tk_enc():
"""
获取优酷的_m_h5_tk和_m_h5_tk_enc
"""
api_url = "https://acs.youku.com/h5/mtop.com.youku.aplatform.weakget/1.0/?jsv=2.5.1&appKey=24679788"
try:
r = requests.get(api_url, headers=chrome, timeout=5)
except Exception as e:
return
tk_enc = dict(r.cookies)
if tk_enc.get("_m_h5_tk_enc") and tk_enc.get("_m_h5_tk"):
return tk_enc
return
def get_cna():
api_url = "https://log.mmstat.com/eg.js"
try:
r = requests.get(api_url, headers=chrome, timeout=5)
except Exception as e:
return
cookies = dict(r.cookies)
if cookies.get("cna"):
return cookies["cna"]
return
def get_danmu_by_mat(vid, cna, mat: int, comments: list):
api_url = "https://acs.youku.com/h5/mopen.youku.danmu.list/1.0/"
tm = str(int(time.time() * 1000))
msg = {
"ctime": tm,
"ctype": 10004,
"cver": "v1.0",
"guid": cna,
"mat": mat,
"mcount": 1,
"pid": 0,
"sver": "3.1.0",
"type": 1,
"vid": vid}
msg_b64encode = base64.b64encode(json.dumps(msg, separators=(',', ':')).encode("utf-8")).decode("utf-8")
msg.update({"msg":msg_b64encode})
msg.update({"sign":yk_msg_sign(msg_b64encode)})
# 测试发现只要有Cookie的_m_h5_tk和_m_h5_tk_enc就行
tk_enc = get_tk_enc()
if tk_enc is None:
return
headers = {
"Content-Type":"application/x-www-form-urlencoded",
"Cookie":";".join([k + "=" + v for k, v in tk_enc.items()]),
"Referer": "https://v.youku.com"
}
headers.update(chrome)
t = str(int(time.time() * 1000))
data = json.dumps(msg, separators=(',', ':'))
params = {
"jsv":"2.5.6",
"appKey":"24679788",
"t":t,
"sign":yk_t_sign(tk_enc["_m_h5_tk"][:32], t, "24679788", data),
"api":"mopen.youku.danmu.list",
"v":"1.0",
"type":"originaljson",
"dataType":"jsonp",
"timeout":"20000",
"jsonpIncPrefix":"utility"
}
try:
r = requests.post(api_url, params=params, data={"data":data}, headers=headers, timeout=5).content.decode("utf-8")
except Exception as e:
print("youku danmu request failed.", e)
return "once again"
result = json.loads(json.loads(r)["data"]["result"])["data"]["result"]
for item in result:
comment = item["content"]
c_int = json.loads(item["propertis"])["color"]
if c_int.__class__ == str:
c_int = int(c_int)
color = hex(c_int)[2:].zfill(6)
timepoint = item["playat"] / 1000
comments.append([comment, [color], timepoint])
return comments
def main(args):
cna = get_cna()
if cna is None:
# 放前面 免得做无用功
return
isall = False
if args.series:
isall = True
vinfos = []
if args.url:
vi = get_vinfos_by_url_youku(args.url, isall=isall)
if vi:
vinfos += vi
if args.vid:
vi = get_vinfos_by_video_id(args.vid, isall=isall)
if vi:
vinfos += vi
subtitles = {}
for name, duration, video_id in vinfos:
print(name, "开始下载...")
flag, file_path = check_file(name, skip=args.y)
if flag is False:
print("跳过{}".format(name))
continue
max_mat = duration // 60 + 1
comments = []
for mat in range(max_mat):
result = get_danmu_by_mat(video_id, cna, mat + 1, comments)
if result is None:
continue
elif result == "once again":
# 可能改成while好点
result = get_danmu_by_mat(video_id, cna, mat + 1, comments)
if result is None:
continue
comments = result
print("已下载{}/{}".format(mat + 1, max_mat))
comments = write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments})
return subtitles