diff --git a/.gitignore b/.gitignore index bb8a233..2058ed2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ # 额外 .vscode/ releases/ +test/ *.ass methods/calc_danmu_pos.py diff --git a/GetDanMu.py b/GetDanMu.py index dd15fe0..40e10eb 100644 --- a/GetDanMu.py +++ b/GetDanMu.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:39 -# 上次编辑时间: 2020-01-05 14:47:53 +# 上次编辑时间 : 2020-01-11 17:49:40 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -13,6 +13,7 @@ from argparse import ArgumentParser from sites.qq import main as qq from sites.iqiyi import main as iqiyi +from sites.youku import main as youku from pfunc.cfunc import check_url_site # ------------------------------------------- @@ -34,8 +35,9 @@ def main(): parser.add_argument("-vid", "--vid", default="", help="下载vid对应视频的弹幕,支持同时多个vid,需要用逗号隔开") parser.add_argument("-aid", "--aid", default="", help="下载aid对应视频的弹幕(爱奇艺合集)") parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕,支持同时多个tvid,需要用逗号隔开") + parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕") parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕") - parser.add_argument("-y", "--y", action="store_true", help="覆盖原有弹幕而不提示") + parser.add_argument("-y", "--y", action="store_false", help="默认覆盖原有弹幕而不提示") args = parser.parse_args() # print(args.__dict__) if args.url != "": @@ -46,6 +48,8 @@ def main(): subtitles = qq(args) if args.site == "iqiyi": subtitles = iqiyi(args) + if args.site == "youku": + subtitles = youku(args) if __name__ == "__main__": # 打包 --> pyinstaller GetDanMu.spec diff --git a/README.md b/README.md index 2fde5cc..c5bb54d 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # GetDanMu @@ -13,6 +13,7 @@ | :--: | :-- | :-----: | :-----: | :-----: | | **腾讯视频** | |✓|✓| | | **爱奇艺** | |✓|✓|✓| +| **优酷** | |✓|✓|✓| ## 可能存在的问题 - 下载进度接近100%时暂时没有反应 @@ -24,6 +25,12 @@ # 更新日志 +## 2020/1/11 +- 增加优酷弹幕下载,支持合集,支持通过单集直接下载合集弹幕(暂时仅限优酷) +- 改进去重方式 +- 优酷的视频id用vid指代,若下载合集请使用连接或通过`-series`选项下载合集弹幕 +- 加入下载进度显示,后续可能改进为单行刷新 + ## 2020/1/5 - 增加了通过链接下载爱奇艺视频弹幕的方法,支持综艺合集。 diff --git a/basic/ass.py b/basic/ass.py index f11b57c..13b2c0f 100644 --- a/basic/ass.py +++ b/basic/ass.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:46 -# 上次编辑时间: 2020-01-05 14:45:55 +# 上次编辑时间 : 2020-01-11 17:20:21 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -62,10 +62,4 @@ def check_font(font): font_style_name = font else: pass - return font_path, font_style_name - -def check_content(content: str, comments: list): - content = content.replace(" ", "") - if content in comments: - return - return content \ No newline at end of file + return font_path, font_style_name \ No newline at end of file diff --git a/pfunc/cfunc.py b/pfunc/cfunc.py index 8029fe7..73e970f 100644 --- a/pfunc/cfunc.py +++ b/pfunc/cfunc.py @@ -3,11 +3,24 @@ ''' # 作者: weimo # 创建日期: 2020-01-05 12:45:18 -# 上次编辑时间: 2020-01-05 14:44:42 +# 上次编辑时间 : 2020-01-11 17:37:22 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' + +import hashlib from urllib.parse import urlparse +def remove_same_danmu(comments: list): + # 在原有基础上pop会引起索引变化 所以还是采用下面这个方式 + contents = [] + for comment in comments: + content, color, timepoint = comment + content = content.replace(" ", "") + if content in contents: + continue + else: + contents.append([content, color, timepoint]) + return contents def check_url_site(url): return urlparse(url).netloc.split(".")[-2] @@ -21,4 +34,11 @@ def check_url_locale(url): if urlparse(url).netloc.split(".")[0] == "tw": return flag["tw"] else: - return flag["cn"] \ No newline at end of file + return flag["cn"] + +def yk_msg_sign(msg: str): + return hashlib.new("md5", bytes(msg + "MkmC9SoIw6xCkSKHhJ7b5D2r51kBiREr", "utf-8")).hexdigest() + +def yk_t_sign(token, t, appkey, data): + text = "&".join([token, t, appkey, data]) + return hashlib.new('md5', bytes(text, 'utf-8')).hexdigest() \ No newline at end of file diff --git a/pfunc/dump_to_ass.py b/pfunc/dump_to_ass.py index bca3411..5af0fb3 100644 --- a/pfunc/dump_to_ass.py +++ b/pfunc/dump_to_ass.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:17:44 -# 上次编辑时间: 2020-01-05 14:45:03 +# 上次编辑时间 : 2020-01-11 17:25:09 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' import os @@ -11,16 +11,19 @@ import os from basic.ass import get_ass_head, check_font from methods.assbase import ASS from methods.sameheight import SameHeight +from pfunc.cfunc import remove_same_danmu def write_one_video_subtitles(file_path, comments, args): # 对于合集则每次都都得检查一次 也可以放在上一级 放在这里 考虑后面可能特殊指定字体的情况 font_path, font_style_name = check_font(args.font) ass_head = get_ass_head(font_style_name, args.font_size) - get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", font_path=font_path, font_size=args.font_size) + get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", font_path=font_path, font_size=int(args.font_size)) subtitle = ASS(file_path, get_xy_obj, font=font_style_name) + comments = remove_same_danmu(comments) for comment in comments: subtitle.create_new_line(comment) write_lines_to_file(ass_head, subtitle.lines, file_path) + return comments def write_lines_to_file(ass_head, lines, file_path): with open(file_path, "a+", encoding="utf-8") as f: diff --git a/pfunc/request_info.py b/pfunc/request_info.py index c2aa539..34a6e28 100644 --- a/pfunc/request_info.py +++ b/pfunc/request_info.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:43 -# 上次编辑时间: 2020-01-05 14:47:16 +# 上次编辑时间 : 2020-01-11 17:42:30 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' import re @@ -192,4 +192,64 @@ def get_vinfos_by_url(url): else: return get_vinfos(aid, locale=locale) -#-------------------------------------------iqiyi-------------------------------------------- \ No newline at end of file +#-------------------------------------------iqiyi-------------------------------------------- + +#-------------------------------------------youku-------------------------------------------- + +def get_vinfos_by_url_youku(url, isall=False): + vid_patterns = ["[\s\S]+?youku.com/video/id_(/+?)\.html", "[\s\S]+?youku.com/v_show/id_(.+?)\.html"] + video_id = matchit(vid_patterns, url) + show_id_patterns = ["[\s\S]+?youku.com/v_nextstage/id_(/+?)\.html", "[\s\S]+?youku.com/show/id_z(.+?)\.html", "[\s\S]+?youku.com/show_page/id_z(.+?)\.html", "[\s\S]+?youku.com/alipay_video/id_(.+?)\.html"] + show_id = matchit(show_id_patterns, url) + if video_id is None and show_id is None: + return None + if video_id: + return get_vinfos_by_video_id(video_id, isall=isall) + if show_id.__len__() == 20 and show_id == show_id.lower(): + return get_vinfos_by_show_id(show_id) + else: + return get_vinfos_by_video_id(show_id, isall=isall) + +def get_vinfos_by_video_id(video_id, isall=False): + api_url = "https://openapi.youku.com/v2/videos/show.json?client_id=53e6cc67237fc59a&package=com.huawei.hwvplayer.youku&ext=show&video_id={}".format(video_id) + try: + r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8") + except Exception as e: + print("get_vinfos_by_video_id error info -->", e) + return None + data = json.loads(r) + if isall: + show_id = data["show"]["id"] + return get_vinfos_by_show_id(show_id) + duration = 0 + if data.get("duration"): + duration = int(float(data["duration"])) + if data.get("title"): + name = data["title"] + "_" + str(duration) + else: + name = "优酷未知" + "_" + str(duration) + vinfo = [name, duration, video_id] + return [vinfo] + +def get_vinfos_by_show_id(show_id): + api_url = "https://openapi.youku.com/v2/shows/videos.json?show_videotype=正片&count=100&client_id=53e6cc67237fc59a&page=1&show_id={}&package=com.huawei.hwvplayer.youku".format(show_id) + try: + r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8") + except Exception as e: + print("get_vinfos_by_show_id error info -->", e) + return None + data = json.loads(r)["videos"] + if data.__len__() == 0: + return None + vinfos = [] + for video in data: + duration = 0 + if video.get("duration"): + duration = int(float(video["duration"])) + if video.get("title"): + name = video["title"] + "_" + str(duration) + else: + name = "优酷未知_{}".format(video["id"]) + "_" + str(duration) + vinfos.append([name, duration, video["id"]]) + return vinfos +#-------------------------------------------youku-------------------------------------------- \ No newline at end of file diff --git a/sites/iqiyi.py b/sites/iqiyi.py index 9050ade..e4c6236 100644 --- a/sites/iqiyi.py +++ b/sites/iqiyi.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:41 -# 上次编辑时间: 2020-01-05 14:45:17 +# 上次编辑时间 : 2020-01-11 17:23:32 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -14,7 +14,6 @@ from zlib import decompress from xmltodict import parse from basic.vars import iqiyiplayer -from basic.ass import check_content from pfunc.dump_to_ass import check_file, write_one_video_subtitles from pfunc.request_info import get_vinfos, get_vinfos_by_url, get_vinfo_by_tvid @@ -48,7 +47,6 @@ def get_danmu_by_tvid(name, duration, tvid): continue # with open("raw_xml.json", "w", encoding="utf-8") as f: # f.write(json.dumps(parse(raw_xml), ensure_ascii=False, indent=4)) - contents = [] if entry.__class__ != list: entry = [entry] for comment in entry: @@ -58,13 +56,8 @@ def get_danmu_by_tvid(name, duration, tvid): if bulletInfo.__class__ != list: bulletInfo = [bulletInfo] for info in bulletInfo: - content = check_content(info["content"], contents) - if content is None: - continue - else: - contents.append(content) color = [info["color"]] - comments.append([content, color, int(comment["int"])]) + comments.append([info["content"], color, int(comment["int"])]) print("已下载{:.2f}%".format(index * timestamp * 100 / duration)) index += 1 comments = sorted(comments, key=lambda _: _[-1]) @@ -93,8 +86,8 @@ def main(args): flag, file_path = check_file(name, skip=args.y) if flag is False: print("跳过{}".format(name)) - return + continue comments = get_danmu_by_tvid(name, duration, tvid) - write_one_video_subtitles(file_path, comments, args) + comments = write_one_video_subtitles(file_path, comments, args) subtitles.update({file_path:comments}) return subtitles \ No newline at end of file diff --git a/sites/qq.py b/sites/qq.py index 617c70b..0930226 100644 --- a/sites/qq.py +++ b/sites/qq.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:37 -# 上次编辑时间: 2020-01-05 14:47:36 +# 上次编辑时间 : 2020-01-11 17:25:34 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -13,7 +13,6 @@ import json import requests from basic.vars import qqlive -from basic.ass import check_content from pfunc.dump_to_ass import check_file, write_one_video_subtitles from pfunc.request_info import get_all_vids_by_cid as get_vids from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id @@ -80,13 +79,7 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅 # timestamp不变 再试一次 continue danmu_count = danmu["count"] - contents = [] for comment in danmu["comments"]: - content = check_content(comment["content"], contents) - if content is None: - continue - else: - contents.append(content) if comment["content_style"]: style = json.loads(comment["content_style"]) if style.get("gradient_colors"): @@ -97,7 +90,7 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅 color = ["ffffff"] else: color = ["ffffff"] - comments.append([content, color, comment["timepoint"]]) + comments.append([comment["content"], color, comment["timepoint"]]) print("已下载{:.2f}%".format(params["timestamp"]*100/duration)) params["timestamp"] += 30 comments = sorted(comments, key=lambda _: _[-1]) @@ -157,8 +150,11 @@ def main(args): vinfos = get_video_info_by_vid(vids) subtitles = {} for vinfo in vinfos: - comments, file_path = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y) - write_one_video_subtitles(file_path, comments, args) + infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y) + if infos is None: + continue + comments, file_path = infos + comments = write_one_video_subtitles(file_path, comments, args) subtitles.update({file_path:comments}) return subtitles diff --git a/sites/youku.py b/sites/youku.py new file mode 100644 index 0000000..e1f18b6 --- /dev/null +++ b/sites/youku.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3.7 +# coding=utf-8 +''' +# 作者: weimo +# 创建日期: 2020-01-05 14:52:21 +# 上次编辑时间 : 2020-01-11 17:53:14 +# 一个人的命运啊,当然要靠自我奋斗,但是... +''' +import re +import time +import json +import base64 +import requests + +from basic.vars import chrome +from pfunc.dump_to_ass import check_file, write_one_video_subtitles +from pfunc.cfunc import yk_msg_sign, yk_t_sign +from pfunc.request_info import get_vinfos_by_show_id, get_vinfos_by_video_id, get_vinfos_by_url_youku + +def get_tk_enc(): + """ + 获取优酷的_m_h5_tk和_m_h5_tk_enc + """ + api_url = "https://acs.youku.com/h5/mtop.com.youku.aplatform.weakget/1.0/?jsv=2.5.1&appKey=24679788" + try: + r = requests.get(api_url, headers=chrome, timeout=5) + except Exception as e: + return + tk_enc = dict(r.cookies) + if tk_enc.get("_m_h5_tk_enc") and tk_enc.get("_m_h5_tk"): + return tk_enc + return + +def get_cna(): + api_url = "https://log.mmstat.com/eg.js" + try: + r = requests.get(api_url, headers=chrome, timeout=5) + except Exception as e: + return + cookies = dict(r.cookies) + if cookies.get("cna"): + return cookies["cna"] + return + +def get_danmu_by_mat(vid, cna, mat: int, comments: list): + api_url = "https://acs.youku.com/h5/mopen.youku.danmu.list/1.0/" + tm = str(int(time.time() * 1000)) + msg = { + "ctime": tm, + "ctype": 10004, + "cver": "v1.0", + "guid": cna, + "mat": mat, + "mcount": 1, + "pid": 0, + "sver": "3.1.0", + "type": 1, + "vid": vid} + msg_b64encode = base64.b64encode(json.dumps(msg, separators=(',', ':')).encode("utf-8")).decode("utf-8") + msg.update({"msg":msg_b64encode}) + msg.update({"sign":yk_msg_sign(msg_b64encode)}) + # 测试发现只要有Cookie的_m_h5_tk和_m_h5_tk_enc就行 + tk_enc = get_tk_enc() + if tk_enc is None: + return + headers = { + "Content-Type":"application/x-www-form-urlencoded", + "Cookie":";".join([k + "=" + v for k, v in tk_enc.items()]), + "Referer": "https://v.youku.com" + } + headers.update(chrome) + t = str(int(time.time() * 1000)) + data = json.dumps(msg, separators=(',', ':')) + params = { + "jsv":"2.5.6", + "appKey":"24679788", + "t":t, + "sign":yk_t_sign(tk_enc["_m_h5_tk"][:32], t, "24679788", data), + "api":"mopen.youku.danmu.list", + "v":"1.0", + "type":"originaljson", + "dataType":"jsonp", + "timeout":"20000", + "jsonpIncPrefix":"utility" + } + try: + r = requests.post(api_url, params=params, data={"data":data}, headers=headers, timeout=5).content.decode("utf-8") + except Exception as e: + print("youku danmu request failed.", e) + return "once again" + result = json.loads(json.loads(r)["data"]["result"])["data"]["result"] + for item in result: + comment = item["content"] + c_int = json.loads(item["propertis"])["color"] + if c_int.__class__ == str: + c_int = int(c_int) + color = hex(c_int)[2:].zfill(6) + timepoint = item["playat"] / 1000 + comments.append([comment, [color], timepoint]) + return comments + +def main(args): + cna = get_cna() + if cna is None: + # 放前面 免得做无用功 + return + isall = False + if args.series: + isall = True + vinfos = [] + if args.url: + vi = get_vinfos_by_url_youku(args.url, isall=isall) + if vi: + vinfos += vi + if args.vid: + vi = get_vinfos_by_video_id(args.vid, isall=isall) + if vi: + vinfos += vi + subtitles = {} + for name, duration, video_id in vinfos: + print(name, "开始下载...") + flag, file_path = check_file(name, skip=args.y) + if flag is False: + print("跳过{}".format(name)) + continue + max_mat = duration // 60 + 1 + comments = [] + for mat in range(max_mat): + result = get_danmu_by_mat(video_id, cna, mat + 1, comments) + if result is None: + continue + elif result == "once again": + # 可能改成while好点 + result = get_danmu_by_mat(video_id, cna, mat + 1, comments) + if result is None: + continue + comments = result + print("已下载{}/{}".format(mat + 1, max_mat)) + comments = write_one_video_subtitles(file_path, comments, args) + subtitles.update({file_path:comments}) + return subtitles \ No newline at end of file