增加搜狐视频弹幕下载并改进输入提示

2026-06-22 18:10:25 +08:00 · 2020-01-16 20:12:07 +08:00
parent 3cfccc1c3c
commit 986ec2b9fe
10 changed files with 302 additions and 30 deletions
--- a/GetDanMu.py
+++ b/GetDanMu.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:39
-# 上次编辑时间       : 2020-01-11 18:40:49
+# 上次编辑时间       : 2020-01-16 19:24:10
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
@@ -14,6 +14,7 @@ from argparse import ArgumentParser
 from sites.qq import main as qq
 from sites.iqiyi import main as iqiyi
 from sites.youku import main as youku
 from sites.sohu import main as sohu
 from pfunc.cfunc import check_url_site
 # -------------------------------------------
@@ -37,19 +38,36 @@ def main():
    parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕，支持同时多个tvid，需要用逗号隔开")
    parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕")
    parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕")
-    parser.add_argument("-y", "--y", action="store_false", help="默认覆盖原有弹幕而不提示")
+    parser.add_argument("-y", "--y", action="store_true", help="默认覆盖原有弹幕而不提示")
    args = parser.parse_args()
    # print(args.__dict__)
    init_args = sys.argv
    imode = "command_line"
    if init_args.__len__() == 1:
        # 双击运行或命令执行exe文件时 传入参数只有exe的路径 
        # 命令行下执行会传入exe的相对路径（在exe所在路径执行时） 传入完整路径（非exe所在路径下执行）
        # 双击运行exe传入完整路径
        imode == "non_command_line"
    if imode == "non_command_line":
        content = input("请输入链接：\n")
        check_tip = check_url_site(content)
        if check_tip is None:
            sys.exit("不支持的网站")
        args.url = content
        args.site = check_tip
    # 要么有url 要么有site和相关参数的组合
    if args.url != "":
        args.site = check_url_site(args.url)
-    if args.site == "":
+    elif args.site == "":
-        args.site = input("请输入站点（qq/iqiyi/youku）：\n")
+        sys.exit("请传入链接或指定网站+视频相关的参数")
    if args.site == "qq":
        subtitles = qq(args)
    if args.site == "iqiyi":
        subtitles = iqiyi(args)
    if args.site == "youku":
        subtitles = youku(args)
    if args.site == "sohu":
        subtitles = sohu(args)
 if __name__ == "__main__":
    # 打包 --> pyinstaller GetDanMu.spec
--- a/README.md
+++ b/README.md
@@ -1,12 +1,7 @@
 <!--
 * @作者: weimo
 * @创建日期: 2020-01-04 18:45:58
 * @上次编辑时间       : 2020-01-11 18:43:20
 * @一个人的命运啊,当然要靠自我奋斗,但是...
 -->
 # GetDanMu
-转换/下载各类视频弹幕的工具
+[转换/下载各类视频弹幕的工具][1]
 项目主页：https://github.com/xhlove/GetDanMu
 ## 网站支持
@@ -15,6 +10,7 @@
 | **腾讯视频** | <https://v.qq.com/>    |✓|✓| |
 | **爱奇艺** | <https://www.iqiyi.com/>    |✓|✓|✓|
 | **优酷** | <https://v.youku.com/>    |✓|✓|✓|
 | **搜狐视频** | <https://tv.sohu.com/>    |✓|✓||
 ## 可能存在的问题
 - 下载进度接近100%时暂时没有反应
@@ -26,6 +22,11 @@
 # 更新日志
 ## 2020/1/16
 - 增加搜狐视频的支持（剧集）
 - 改进输入提示（双击运行时）
 - 腾讯支持-series设定
 ## 2020/1/11
 - 增加优酷弹幕下载，支持合集，支持通过单集直接下载合集弹幕（暂时仅限优酷）
 - 改进去重方式
@@ -36,3 +37,5 @@
 - 增加了通过链接下载爱奇艺视频弹幕的方法，支持综艺合集。
 - 增加通过链接判断网站
  [1]: https://blog.weimo.info/archives/431/
--- a/basic/vars.py
+++ b/basic/vars.py
@@ -3,9 +3,12 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:35
-# 上次编辑时间: 2020-01-05 14:46:15
+# 上次编辑时间       : 2020-01-16 19:10:06
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 ALLOW_SITES = ["qq", "iqiyi", "youku", "sohu"]
 qqlive = {
    "User-Agent":"qqlive"
 }
--- a/pfunc/cfunc.py
+++ b/pfunc/cfunc.py
@@ -3,13 +3,15 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-05 12:45:18
-# 上次编辑时间       : 2020-01-11 17:37:22
+# 上次编辑时间       : 2020-01-16 14:50:34
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import hashlib
 from urllib.parse import urlparse
 from basic.vars import ALLOW_SITES
 def remove_same_danmu(comments: list):
    # 在原有基础上pop会引起索引变化 所以还是采用下面这个方式
    contents = []
@@ -23,7 +25,11 @@ def remove_same_danmu(comments: list):
    return contents
 def check_url_site(url):
-    return urlparse(url).netloc.split(".")[-2]
+    site = urlparse(url).netloc.split(".")[-2]
    if site in ALLOW_SITES:
        return site
    else:
        return None
 def check_url_locale(url):
    flag = {
--- a/pfunc/dump_to_ass.py
+++ b/pfunc/dump_to_ass.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:17:44
-# 上次编辑时间       : 2020-01-11 17:25:09
+# 上次编辑时间       : 2020-01-16 20:06:23
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import os
@@ -31,12 +31,16 @@ def write_lines_to_file(ass_head, lines, file_path):
        for line in lines:
            f.write(line + "\n")
-def check_file(name, skip=False, fpath=os.getcwd()):
+def check_file(name, args, fpath=os.getcwd()):
    flag = True
    file_path = os.path.join(fpath, name + ".ass")
    if os.path.isfile(file_path):
-        if skip:
+        if args.y:
            os.remove(file_path)
        elif args.series:
            # 存在重复的 那么直接pass（认为已经下载好了）
            flag = False
            return flag, file_path
        else:
            isremove = input("{}已存在，是否覆盖？(y/n)：".format(file_path))
            if isremove.strip() == "y":
--- a/pfunc/request_info.py
+++ b/pfunc/request_info.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:43
-# 上次编辑时间       : 2020-01-11 17:42:30
+# 上次编辑时间       : 2020-01-16 19:44:55
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import re
@@ -40,6 +40,28 @@ def get_all_vids_by_column_id():
    # 综艺类型的
    pass
 def get_cid_by_vid(vid):
    api_url = "http://union.video.qq.com/fcgi-bin/data"
    params = {
        "tid": "98",
        "appid": "10001005",
        "appkey": "0d1a9ddd94de871b",
        "idlist": vid,
        "otype":"json"
    }
    r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8")
    data = json.loads(r.lstrip("QZOutputJson=").rstrip(";"))
    try:
        cid = data["results"][0]["fields"]
    except Exception as e:
        print("load fields error info -->", e)
        return None
    if cid.get("sync_cover"):
        return cid["sync_cover"]
    elif cid.get("cover_list"):
        return cid["cover_list"][0]
    return
 def get_all_vids_by_cid(cid):
    api_url = "http://union.video.qq.com/fcgi-bin/data"
    params = {
--- a/sites/iqiyi.py
+++ b/sites/iqiyi.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:41
-# 上次编辑时间       : 2020-01-11 17:23:32
+# 上次编辑时间       : 2020-01-16 19:58:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
@@ -83,7 +83,7 @@ def main(args):
    subtitles = {}
    for name, duration, tvid in vinfos:
        print(name, "开始下载...")
-        flag, file_path = check_file(name, skip=args.y)
+        flag, file_path = check_file(name, args)
        if flag is False:
            print("跳过{}".format(name))
            continue
--- a/sites/qq.py
+++ b/sites/qq.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:37
-# 上次编辑时间       : 2020-01-11 17:25:34
+# 上次编辑时间       : 2020-01-16 20:04:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
@@ -14,6 +14,7 @@ import requests
 from basic.vars import qqlive
 from pfunc.dump_to_ass import check_file, write_one_video_subtitles
 from pfunc.request_info import get_cid_by_vid
 from pfunc.request_info import get_all_vids_by_cid as get_vids
 from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id
@@ -97,10 +98,10 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
    return comments
-def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=False):
+def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, args=""):
    vid, name, duration, target_id = vinfo
    print(name, "开始下载...")
-    flag, file_path = check_file(name, skip=skip)
+    flag, file_path = check_file(name, args)
    if flag is False:
        print("跳过{}".format(name))
        return
@@ -108,7 +109,7 @@ def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=Fal
    # print("{}弹幕下载完成！".format(name))
    return comments, file_path
-def ask_input(url=""):
+def ask_input(url="", isall=False):
    if url == "":
        url = input("请输入vid/coverid/链接，输入q退出：\n").strip()
    if url == "q" or url == "":
@@ -117,6 +118,9 @@ def ask_input(url=""):
    params = url.replace(".html", "").split("/")
    if params[-1].__len__() == 11:
        vids = [params[-1]]
        if isall:
            cid = get_cid_by_vid(params[-1])
            vids += get_vids(cid)
    elif params[-1].__len__() == 15:
        cid = params[-1]
        vids = get_vids(cid)
@@ -132,6 +136,9 @@ def ask_input(url=""):
 def main(args):
    vids = []
    isall = False
    if args.series:
        isall = True
    if args.cid and args.cid.__len__() == 15:
        vids += get_vids(args.cid)
    if args.vid:
@@ -141,16 +148,26 @@ def main(args):
            vids += [vid for vid in args.vid.strip().replace(" ", "").split(",") if vid.__len__() == 11]
        else:
            pass
        if args.series:
            cid = get_cid_by_vid(args.vid)
            vids += get_vids(cid)
    if args.url:
-        vids += ask_input(url=args.url)
+        vids += ask_input(url=args.url, isall=isall)
    if args.vid == "" and args.cid == "" and args.url == "":
-        vids += ask_input()
+        vids += ask_input(isall=isall)
    if vids.__len__() <= 0:
        sys.exit("没有任何有效输入")
    vids_bak = vids
    vids = []
    for vid in vids_bak:
        if vid in vids:
            continue
        else:
            vids.append(vid)
    vinfos = get_video_info_by_vid(vids)
    subtitles = {}
    for vinfo in vinfos:
-        infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y)
+        infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args=args)
        if infos is None:
            continue
        comments, file_path = infos
--- a/sites/sohu.py
+++ b/sites/sohu.py
@@ -0,0 +1,199 @@
 #!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
 # 创建日期: 2020-01-16 17:45:35
 # 上次编辑时间       : 2020-01-16 20:09:22
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import json
 import requests
 from basic.vars import chrome
 from pfunc.request_info import matchit
 from pfunc.dump_to_ass import check_file, write_one_video_subtitles
 def try_decode(content):
    flag = False
    methods = ["gbk", "utf-8"]
    for method in methods:
        try:
            content_decode = content.decode(method)
        except Exception as e:
            print("try {} decode method failed.".format(method))
            continue
        flag = True
        break
    if flag is True:
        return content_decode
    else:
        return None
 def get_vinfos_by_url(url: str):
    ep_url = matchit(["[\s\S]+?tv.sohu.com/v/(.+?)\.html", "[\s\S]+?tv.sohu.com/(.+?)/(.+?)\.html"], url)
    aid_url = matchit(["[\s\S]+?tv.sohu.com/album/.(\d+)\.shtml"], url)
    vid_url = matchit(["[\s\S]+?tv.sohu.com/v(\d+)\.shtml"], url)
    if ep_url:
        try:
            r = requests.get(url, headers=chrome, timeout=3).content
        except Exception as e:
            print(e)
            print("get sohu (url -> {}) ep url failed.".format(url))
            return
        r_decode = try_decode(r)
        if r_decode is None:
            print("ep response use decode failed(url -> {}).".format(url))
            return None
        vid = matchit(["[\s\S]+?var vid.+?(\d+)"], r_decode)
        if vid:
            vinfo = get_vinfo_by_vid(vid)
            if vinfo is None:
                return
            else:
                return [vinfo]
        else:
            print("match sohu vid (url -> {}) failed.".format(url))
            return None
    if aid_url:
        return get_vinfos(aid_url)
    if vid_url:
        vinfo = get_vinfo_by_vid(vid_url)
        if vinfo is None:
            return
        else:
            return [vinfo]
    if ep_url is None and aid_url is None and vid_url is None:
        # 可能是合集页面
        try:
            r = requests.get(url, headers=chrome, timeout=3).content
        except Exception as e:
            print("get sohu (url -> {}) album url failed.".format(url))
            return
        r_decode = try_decode(r)
        if r_decode is None:
            print("album response decode failed(url -> {}).".format(url))
            return None
        aid = matchit(["[\s\S]+?var playlistId.+?(\d+)"], r_decode)
        if aid:
            return get_vinfos(aid)
    return
 def get_vinfos(aid: str):
    api_url = "https://pl.hd.sohu.com/videolist"
    params = {
        "callback": "",
        "playlistid": aid,
        "o_playlistId": "",
        "pianhua": "0",
        "pagenum": "1",
        "pagesize": "999",
        "order": "0", # 0 从小到大
        "cnt": "1",
        "pageRule": "2",
        "withPgcVideo": "0",
        "ssl": "0",
        "preVideoRule": "3",
        "_": "" # 1579167883430
    }
    try:
        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("gbk")
    except Exception as e:
        print("get sohu (vid -> {}) videolist failed.".format(vid))
        return None
    data = json.loads(r)
    if data.get("videos"):
        videos = data["videos"]
    else:
        print("videolist has no videos (aid -> {}).".format(aid))
        return None
    vinfos = [[video["name"], int(float(video["playLength"])), video["vid"], aid] for video in videos]
    return vinfos
 def get_vinfo_by_vid(vid: str):
    api_url = "https://hot.vrs.sohu.com/vrs_flash.action"
    params = {
        "vid": vid,
        "ver": "31",
        "ssl": "1",
        "pflag": "pch5"
    }
    try:
        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
    except Exception as e:
        print("get sohu (vid -> {}) vinfo failed.".format(vid))
        return None
    data = json.loads(r)
    if data.get("status") == 1:
        aid = ""
        if data.get("pid"):
            aid = str(data["pid"])
        if data.get("data"):
            data = data["data"]
        else:
            print("vid -> {} vinfo request return no data.".format(vid))
            return
    else:
        print("vid -> {} vinfo request return error.".format(vid))
        return
    return [data["tvName"], int(float(data["totalDuration"])), vid, aid]
 def get_danmu_all_by_vid(vid: str, aid: str, duration: int):
    api_url = "https://api.danmu.tv.sohu.com/dmh5/dmListAll"
    params = {
        "act": "dmlist_v2",
        "dct": "1",
        "request_from": "h5_js",
        "vid": vid,
        "page": "1",
        "pct": "2",
        "from": "PlayerType.SOHU_VRS",
        "o": "4",
        "aid": aid,
        "time_begin": "0",
        "time_end": str(duration)
    }
    try:
        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
    except Exception as e:
        print("get sohu (vid -> {}) danmu failed.".format(vid))
        return None
    data = json.loads(r)["info"]["comments"]
    comments = []
    for comment in data:
        comments.append([comment["c"], "ffffff", comment["v"]])
    comments = sorted(comments, key=lambda _: _[-1])
    return comments
 def main(args):
    vinfos = []
    if args.vid:
        vi = get_vinfo_by_vid(args.vid)
        if vi:
            vinfos.append(vi)
    if args.aid:
        vi = get_vinfos(args.aid)
        if vi:
            vinfos += vi
    if args.vid == "" and args.aid == "" and args.url == "":
        args.url = input("请输入sohu链接：\n")
    if args.url:
        vi = get_vinfos_by_url(args.url)
        if vi:
            vinfos += vi
    subtitles = {}
    for name, duration, vid, aid in vinfos:
        print(name, "开始下载...")
        flag, file_path = check_file(name, args)
        if flag is False:
            print("跳过{}".format(name))
            continue
        comments = get_danmu_all_by_vid(vid, aid, duration)
        if comments is None:
            print(name, "弹幕获取失败了，记得重试~(@^_^@)~")
            continue
        comments = write_one_video_subtitles(file_path, comments, args)
        subtitles.update({file_path:comments})
        print(name, "下载完成！")
    return subtitles
--- a/sites/youku.py
+++ b/sites/youku.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-05 14:52:21
-# 上次编辑时间       : 2020-01-11 17:53:14
+# 上次编辑时间       : 2020-01-16 19:59:08
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import re
@@ -119,7 +119,7 @@ def main(args):
    subtitles = {}
    for name, duration, video_id in vinfos:
        print(name, "开始下载...")
-        flag, file_path = check_file(name, skip=args.y)
+        flag, file_path = check_file(name, args=args)
        if flag is False:
            print("跳过{}".format(name))
            continue