From 986ec2b9fe3f39cdaf01b7ff20dcd274d1538a5e Mon Sep 17 00:00:00 2001 From: xhlove Date: Thu, 16 Jan 2020 20:12:07 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=90=9C=E7=8B=90=E8=A7=86?= =?UTF-8?q?=E9=A2=91=E5=BC=B9=E5=B9=95=E4=B8=8B=E8=BD=BD=E5=B9=B6=E6=94=B9?= =?UTF-8?q?=E8=BF=9B=E8=BE=93=E5=85=A5=E6=8F=90=E7=A4=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GetDanMu.py | 26 +++++- README.md | 19 ++-- basic/vars.py | 5 +- pfunc/cfunc.py | 10 ++- pfunc/dump_to_ass.py | 10 ++- pfunc/request_info.py | 24 ++++- sites/iqiyi.py | 4 +- sites/qq.py | 31 +++++-- sites/sohu.py | 199 ++++++++++++++++++++++++++++++++++++++++++ sites/youku.py | 4 +- 10 files changed, 302 insertions(+), 30 deletions(-) create mode 100644 sites/sohu.py diff --git a/GetDanMu.py b/GetDanMu.py index cffe95a..7b96f2b 100644 --- a/GetDanMu.py +++ b/GetDanMu.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:39 -# 上次编辑时间 : 2020-01-11 18:40:49 +# 上次编辑时间 : 2020-01-16 19:24:10 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -14,6 +14,7 @@ from argparse import ArgumentParser from sites.qq import main as qq from sites.iqiyi import main as iqiyi from sites.youku import main as youku +from sites.sohu import main as sohu from pfunc.cfunc import check_url_site # ------------------------------------------- @@ -37,19 +38,36 @@ def main(): parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕,支持同时多个tvid,需要用逗号隔开") parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕") parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕") - parser.add_argument("-y", "--y", action="store_false", help="默认覆盖原有弹幕而不提示") + parser.add_argument("-y", "--y", action="store_true", help="默认覆盖原有弹幕而不提示") args = parser.parse_args() # print(args.__dict__) + init_args = sys.argv + imode = "command_line" + if init_args.__len__() == 1: + # 双击运行或命令执行exe文件时 传入参数只有exe的路径 + # 命令行下执行会传入exe的相对路径(在exe所在路径执行时) 传入完整路径(非exe所在路径下执行) + # 双击运行exe传入完整路径 + imode == "non_command_line" + if imode == "non_command_line": + content = input("请输入链接:\n") + check_tip = check_url_site(content) + if check_tip is None: + sys.exit("不支持的网站") + args.url = content + args.site = check_tip + # 要么有url 要么有site和相关参数的组合 if args.url != "": args.site = check_url_site(args.url) - if args.site == "": - args.site = input("请输入站点(qq/iqiyi/youku):\n") + elif args.site == "": + sys.exit("请传入链接或指定网站+视频相关的参数") if args.site == "qq": subtitles = qq(args) if args.site == "iqiyi": subtitles = iqiyi(args) if args.site == "youku": subtitles = youku(args) + if args.site == "sohu": + subtitles = sohu(args) if __name__ == "__main__": # 打包 --> pyinstaller GetDanMu.spec diff --git a/README.md b/README.md index f307414..04864ee 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,7 @@ - # GetDanMu -转换/下载各类视频弹幕的工具 +[转换/下载各类视频弹幕的工具][1] + 项目主页:https://github.com/xhlove/GetDanMu ## 网站支持 @@ -15,6 +10,7 @@ | **腾讯视频** | |✓|✓| | | **爱奇艺** | |✓|✓|✓| | **优酷** | |✓|✓|✓| +| **搜狐视频** | |✓|✓|| ## 可能存在的问题 - 下载进度接近100%时暂时没有反应 @@ -26,6 +22,11 @@ # 更新日志 +## 2020/1/16 +- 增加搜狐视频的支持(剧集) +- 改进输入提示(双击运行时) +- 腾讯支持-series设定 + ## 2020/1/11 - 增加优酷弹幕下载,支持合集,支持通过单集直接下载合集弹幕(暂时仅限优酷) - 改进去重方式 @@ -35,4 +36,6 @@ ## 2020/1/5 - 增加了通过链接下载爱奇艺视频弹幕的方法,支持综艺合集。 -- 增加通过链接判断网站 \ No newline at end of file +- 增加通过链接判断网站 + + [1]: https://blog.weimo.info/archives/431/ \ No newline at end of file diff --git a/basic/vars.py b/basic/vars.py index 7ecec75..c95bfbd 100644 --- a/basic/vars.py +++ b/basic/vars.py @@ -3,9 +3,12 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:35 -# 上次编辑时间: 2020-01-05 14:46:15 +# 上次编辑时间 : 2020-01-16 19:10:06 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' + +ALLOW_SITES = ["qq", "iqiyi", "youku", "sohu"] + qqlive = { "User-Agent":"qqlive" } diff --git a/pfunc/cfunc.py b/pfunc/cfunc.py index 73e970f..f099fff 100644 --- a/pfunc/cfunc.py +++ b/pfunc/cfunc.py @@ -3,13 +3,15 @@ ''' # 作者: weimo # 创建日期: 2020-01-05 12:45:18 -# 上次编辑时间 : 2020-01-11 17:37:22 +# 上次编辑时间 : 2020-01-16 14:50:34 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' import hashlib from urllib.parse import urlparse +from basic.vars import ALLOW_SITES + def remove_same_danmu(comments: list): # 在原有基础上pop会引起索引变化 所以还是采用下面这个方式 contents = [] @@ -23,7 +25,11 @@ def remove_same_danmu(comments: list): return contents def check_url_site(url): - return urlparse(url).netloc.split(".")[-2] + site = urlparse(url).netloc.split(".")[-2] + if site in ALLOW_SITES: + return site + else: + return None def check_url_locale(url): flag = { diff --git a/pfunc/dump_to_ass.py b/pfunc/dump_to_ass.py index 5af0fb3..61f9fd8 100644 --- a/pfunc/dump_to_ass.py +++ b/pfunc/dump_to_ass.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:17:44 -# 上次编辑时间 : 2020-01-11 17:25:09 +# 上次编辑时间 : 2020-01-16 20:06:23 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' import os @@ -31,12 +31,16 @@ def write_lines_to_file(ass_head, lines, file_path): for line in lines: f.write(line + "\n") -def check_file(name, skip=False, fpath=os.getcwd()): +def check_file(name, args, fpath=os.getcwd()): flag = True file_path = os.path.join(fpath, name + ".ass") if os.path.isfile(file_path): - if skip: + if args.y: os.remove(file_path) + elif args.series: + # 存在重复的 那么直接pass(认为已经下载好了) + flag = False + return flag, file_path else: isremove = input("{}已存在,是否覆盖?(y/n):".format(file_path)) if isremove.strip() == "y": diff --git a/pfunc/request_info.py b/pfunc/request_info.py index 34a6e28..3a360ac 100644 --- a/pfunc/request_info.py +++ b/pfunc/request_info.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:43 -# 上次编辑时间 : 2020-01-11 17:42:30 +# 上次编辑时间 : 2020-01-16 19:44:55 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' import re @@ -40,6 +40,28 @@ def get_all_vids_by_column_id(): # 综艺类型的 pass +def get_cid_by_vid(vid): + api_url = "http://union.video.qq.com/fcgi-bin/data" + params = { + "tid": "98", + "appid": "10001005", + "appkey": "0d1a9ddd94de871b", + "idlist": vid, + "otype":"json" + } + r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8") + data = json.loads(r.lstrip("QZOutputJson=").rstrip(";")) + try: + cid = data["results"][0]["fields"] + except Exception as e: + print("load fields error info -->", e) + return None + if cid.get("sync_cover"): + return cid["sync_cover"] + elif cid.get("cover_list"): + return cid["cover_list"][0] + return + def get_all_vids_by_cid(cid): api_url = "http://union.video.qq.com/fcgi-bin/data" params = { diff --git a/sites/iqiyi.py b/sites/iqiyi.py index e4c6236..d928692 100644 --- a/sites/iqiyi.py +++ b/sites/iqiyi.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:41 -# 上次编辑时间 : 2020-01-11 17:23:32 +# 上次编辑时间 : 2020-01-16 19:58:51 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -83,7 +83,7 @@ def main(args): subtitles = {} for name, duration, tvid in vinfos: print(name, "开始下载...") - flag, file_path = check_file(name, skip=args.y) + flag, file_path = check_file(name, args) if flag is False: print("跳过{}".format(name)) continue diff --git a/sites/qq.py b/sites/qq.py index 0930226..3e291f5 100644 --- a/sites/qq.py +++ b/sites/qq.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-04 19:14:37 -# 上次编辑时间 : 2020-01-11 17:25:34 +# 上次编辑时间 : 2020-01-16 20:04:51 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -14,6 +14,7 @@ import requests from basic.vars import qqlive from pfunc.dump_to_ass import check_file, write_one_video_subtitles +from pfunc.request_info import get_cid_by_vid from pfunc.request_info import get_all_vids_by_cid as get_vids from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id @@ -97,10 +98,10 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅 return comments -def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=False): +def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, args=""): vid, name, duration, target_id = vinfo print(name, "开始下载...") - flag, file_path = check_file(name, skip=skip) + flag, file_path = check_file(name, args) if flag is False: print("跳过{}".format(name)) return @@ -108,7 +109,7 @@ def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=Fal # print("{}弹幕下载完成!".format(name)) return comments, file_path -def ask_input(url=""): +def ask_input(url="", isall=False): if url == "": url = input("请输入vid/coverid/链接,输入q退出:\n").strip() if url == "q" or url == "": @@ -117,6 +118,9 @@ def ask_input(url=""): params = url.replace(".html", "").split("/") if params[-1].__len__() == 11: vids = [params[-1]] + if isall: + cid = get_cid_by_vid(params[-1]) + vids += get_vids(cid) elif params[-1].__len__() == 15: cid = params[-1] vids = get_vids(cid) @@ -132,6 +136,9 @@ def ask_input(url=""): def main(args): vids = [] + isall = False + if args.series: + isall = True if args.cid and args.cid.__len__() == 15: vids += get_vids(args.cid) if args.vid: @@ -141,16 +148,26 @@ def main(args): vids += [vid for vid in args.vid.strip().replace(" ", "").split(",") if vid.__len__() == 11] else: pass + if args.series: + cid = get_cid_by_vid(args.vid) + vids += get_vids(cid) if args.url: - vids += ask_input(url=args.url) + vids += ask_input(url=args.url, isall=isall) if args.vid == "" and args.cid == "" and args.url == "": - vids += ask_input() + vids += ask_input(isall=isall) if vids.__len__() <= 0: sys.exit("没有任何有效输入") + vids_bak = vids + vids = [] + for vid in vids_bak: + if vid in vids: + continue + else: + vids.append(vid) vinfos = get_video_info_by_vid(vids) subtitles = {} for vinfo in vinfos: - infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y) + infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args=args) if infos is None: continue comments, file_path = infos diff --git a/sites/sohu.py b/sites/sohu.py new file mode 100644 index 0000000..68d4f18 --- /dev/null +++ b/sites/sohu.py @@ -0,0 +1,199 @@ +#!/usr/bin/env python3.7 +# coding=utf-8 +''' +# 作者: weimo +# 创建日期: 2020-01-16 17:45:35 +# 上次编辑时间 : 2020-01-16 20:09:22 +# 一个人的命运啊,当然要靠自我奋斗,但是... +''' +import json +import requests + +from basic.vars import chrome +from pfunc.request_info import matchit +from pfunc.dump_to_ass import check_file, write_one_video_subtitles + +def try_decode(content): + flag = False + methods = ["gbk", "utf-8"] + for method in methods: + try: + content_decode = content.decode(method) + except Exception as e: + print("try {} decode method failed.".format(method)) + continue + flag = True + break + if flag is True: + return content_decode + else: + return None + +def get_vinfos_by_url(url: str): + ep_url = matchit(["[\s\S]+?tv.sohu.com/v/(.+?)\.html", "[\s\S]+?tv.sohu.com/(.+?)/(.+?)\.html"], url) + aid_url = matchit(["[\s\S]+?tv.sohu.com/album/.(\d+)\.shtml"], url) + vid_url = matchit(["[\s\S]+?tv.sohu.com/v(\d+)\.shtml"], url) + if ep_url: + try: + r = requests.get(url, headers=chrome, timeout=3).content + except Exception as e: + print(e) + print("get sohu (url -> {}) ep url failed.".format(url)) + return + r_decode = try_decode(r) + if r_decode is None: + print("ep response use decode failed(url -> {}).".format(url)) + return None + vid = matchit(["[\s\S]+?var vid.+?(\d+)"], r_decode) + if vid: + vinfo = get_vinfo_by_vid(vid) + if vinfo is None: + return + else: + return [vinfo] + else: + print("match sohu vid (url -> {}) failed.".format(url)) + return None + if aid_url: + return get_vinfos(aid_url) + if vid_url: + vinfo = get_vinfo_by_vid(vid_url) + if vinfo is None: + return + else: + return [vinfo] + if ep_url is None and aid_url is None and vid_url is None: + # 可能是合集页面 + try: + r = requests.get(url, headers=chrome, timeout=3).content + except Exception as e: + print("get sohu (url -> {}) album url failed.".format(url)) + return + r_decode = try_decode(r) + if r_decode is None: + print("album response decode failed(url -> {}).".format(url)) + return None + aid = matchit(["[\s\S]+?var playlistId.+?(\d+)"], r_decode) + if aid: + return get_vinfos(aid) + return + + +def get_vinfos(aid: str): + api_url = "https://pl.hd.sohu.com/videolist" + params = { + "callback": "", + "playlistid": aid, + "o_playlistId": "", + "pianhua": "0", + "pagenum": "1", + "pagesize": "999", + "order": "0", # 0 从小到大 + "cnt": "1", + "pageRule": "2", + "withPgcVideo": "0", + "ssl": "0", + "preVideoRule": "3", + "_": "" # 1579167883430 + } + try: + r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("gbk") + except Exception as e: + print("get sohu (vid -> {}) videolist failed.".format(vid)) + return None + data = json.loads(r) + if data.get("videos"): + videos = data["videos"] + else: + print("videolist has no videos (aid -> {}).".format(aid)) + return None + vinfos = [[video["name"], int(float(video["playLength"])), video["vid"], aid] for video in videos] + return vinfos + + +def get_vinfo_by_vid(vid: str): + api_url = "https://hot.vrs.sohu.com/vrs_flash.action" + params = { + "vid": vid, + "ver": "31", + "ssl": "1", + "pflag": "pch5" + } + try: + r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8") + except Exception as e: + print("get sohu (vid -> {}) vinfo failed.".format(vid)) + return None + data = json.loads(r) + if data.get("status") == 1: + aid = "" + if data.get("pid"): + aid = str(data["pid"]) + if data.get("data"): + data = data["data"] + else: + print("vid -> {} vinfo request return no data.".format(vid)) + return + else: + print("vid -> {} vinfo request return error.".format(vid)) + return + return [data["tvName"], int(float(data["totalDuration"])), vid, aid] + +def get_danmu_all_by_vid(vid: str, aid: str, duration: int): + api_url = "https://api.danmu.tv.sohu.com/dmh5/dmListAll" + params = { + "act": "dmlist_v2", + "dct": "1", + "request_from": "h5_js", + "vid": vid, + "page": "1", + "pct": "2", + "from": "PlayerType.SOHU_VRS", + "o": "4", + "aid": aid, + "time_begin": "0", + "time_end": str(duration) + } + try: + r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8") + except Exception as e: + print("get sohu (vid -> {}) danmu failed.".format(vid)) + return None + data = json.loads(r)["info"]["comments"] + comments = [] + for comment in data: + comments.append([comment["c"], "ffffff", comment["v"]]) + comments = sorted(comments, key=lambda _: _[-1]) + return comments + +def main(args): + vinfos = [] + if args.vid: + vi = get_vinfo_by_vid(args.vid) + if vi: + vinfos.append(vi) + if args.aid: + vi = get_vinfos(args.aid) + if vi: + vinfos += vi + if args.vid == "" and args.aid == "" and args.url == "": + args.url = input("请输入sohu链接:\n") + if args.url: + vi = get_vinfos_by_url(args.url) + if vi: + vinfos += vi + subtitles = {} + for name, duration, vid, aid in vinfos: + print(name, "开始下载...") + flag, file_path = check_file(name, args) + if flag is False: + print("跳过{}".format(name)) + continue + comments = get_danmu_all_by_vid(vid, aid, duration) + if comments is None: + print(name, "弹幕获取失败了,记得重试~(@^_^@)~") + continue + comments = write_one_video_subtitles(file_path, comments, args) + subtitles.update({file_path:comments}) + print(name, "下载完成!") + return subtitles \ No newline at end of file diff --git a/sites/youku.py b/sites/youku.py index e1f18b6..86d34cf 100644 --- a/sites/youku.py +++ b/sites/youku.py @@ -3,7 +3,7 @@ ''' # 作者: weimo # 创建日期: 2020-01-05 14:52:21 -# 上次编辑时间 : 2020-01-11 17:53:14 +# 上次编辑时间 : 2020-01-16 19:59:08 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' import re @@ -119,7 +119,7 @@ def main(args): subtitles = {} for name, duration, video_id in vinfos: print(name, "开始下载...") - flag, file_path = check_file(name, skip=args.y) + flag, file_path = check_file(name, args=args) if flag is False: print("跳过{}".format(name)) continue