增加搜狐视频弹幕下载并改进输入提示

2026-06-21 09:30:23 +08:00 · 2020-01-16 20:12:07 +08:00
parent 3cfccc1c3c
commit 986ec2b9fe
10 changed files with 302 additions and 30 deletions
--- a/GetDanMu.py
+++ b/GetDanMu.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:39
-# 上次编辑时间       : 2020-01-11 18:40:49
+# 上次编辑时间       : 2020-01-16 19:24:10
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

@@ -14,6 +14,7 @@ from argparse import ArgumentParser
 from sites.qq import main as qq
 from sites.iqiyi import main as iqiyi
 from sites.youku import main as youku
+from sites.sohu import main as sohu
 from pfunc.cfunc import check_url_site

 # -------------------------------------------
@@ -37,19 +38,36 @@ def main():
    parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕，支持同时多个tvid，需要用逗号隔开")
    parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕")
    parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕")
-    parser.add_argument("-y", "--y", action="store_false", help="默认覆盖原有弹幕而不提示")
+    parser.add_argument("-y", "--y", action="store_true", help="默认覆盖原有弹幕而不提示")
    args = parser.parse_args()
    # print(args.__dict__)
+    init_args = sys.argv
+    imode = "command_line"
+    if init_args.__len__() == 1:
+        # 双击运行或命令执行exe文件时 传入参数只有exe的路径 
+        # 命令行下执行会传入exe的相对路径（在exe所在路径执行时） 传入完整路径（非exe所在路径下执行）
+        # 双击运行exe传入完整路径
+        imode == "non_command_line"
+    if imode == "non_command_line":
+        content = input("请输入链接：\n")
+        check_tip = check_url_site(content)
+        if check_tip is None:
+            sys.exit("不支持的网站")
+        args.url = content
+        args.site = check_tip
+    # 要么有url 要么有site和相关参数的组合
    if args.url != "":
        args.site = check_url_site(args.url)
-    if args.site == "":
-        args.site = input("请输入站点（qq/iqiyi/youku）：\n")
+    elif args.site == "":
+        sys.exit("请传入链接或指定网站+视频相关的参数")
    if args.site == "qq":
        subtitles = qq(args)
    if args.site == "iqiyi":
        subtitles = iqiyi(args)
    if args.site == "youku":
        subtitles = youku(args)
+    if args.site == "sohu":
+        subtitles = sohu(args)

 if __name__ == "__main__":
    # 打包 --> pyinstaller GetDanMu.spec
--- a/README.md
+++ b/README.md
@@ -1,12 +1,7 @@
-<!--
- * @作者: weimo
- * @创建日期: 2020-01-04 18:45:58
- * @上次编辑时间       : 2020-01-11 18:43:20
- * @一个人的命运啊,当然要靠自我奋斗,但是...
- -->
 # GetDanMu

-转换/下载各类视频弹幕的工具
+[转换/下载各类视频弹幕的工具][1]
+
 项目主页：https://github.com/xhlove/GetDanMu

 ## 网站支持
@@ -15,6 +10,7 @@
 | **腾讯视频** | <https://v.qq.com/>    |✓|✓| |
 | **爱奇艺** | <https://www.iqiyi.com/>    |✓|✓|✓|
 | **优酷** | <https://v.youku.com/>    |✓|✓|✓|
+| **搜狐视频** | <https://tv.sohu.com/>    |✓|✓||

 ## 可能存在的问题
 - 下载进度接近100%时暂时没有反应
@@ -26,6 +22,11 @@

 # 更新日志

+## 2020/1/16
+- 增加搜狐视频的支持（剧集）
+- 改进输入提示（双击运行时）
+- 腾讯支持-series设定
+
 ## 2020/1/11
 - 增加优酷弹幕下载，支持合集，支持通过单集直接下载合集弹幕（暂时仅限优酷）
 - 改进去重方式
@@ -35,4 +36,6 @@
 ## 2020/1/5

 - 增加了通过链接下载爱奇艺视频弹幕的方法，支持综艺合集。
- 增加通过链接判断网站
+- 增加通过链接判断网站
+
+  [1]: https://blog.weimo.info/archives/431/
--- a/basic/vars.py
+++ b/basic/vars.py
@@ -3,9 +3,12 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:35
-# 上次编辑时间: 2020-01-05 14:46:15
+# 上次编辑时间       : 2020-01-16 19:10:06
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
+
+ALLOW_SITES = ["qq", "iqiyi", "youku", "sohu"]
+
 qqlive = {
    "User-Agent":"qqlive"
 }
--- a/pfunc/cfunc.py
+++ b/pfunc/cfunc.py
@@ -3,13 +3,15 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-05 12:45:18
-# 上次编辑时间       : 2020-01-11 17:37:22
+# 上次编辑时间       : 2020-01-16 14:50:34
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

 import hashlib
 from urllib.parse import urlparse

+from basic.vars import ALLOW_SITES
+
 def remove_same_danmu(comments: list):
    # 在原有基础上pop会引起索引变化 所以还是采用下面这个方式
    contents = []
@@ -23,7 +25,11 @@ def remove_same_danmu(comments: list):
    return contents

 def check_url_site(url):
-    return urlparse(url).netloc.split(".")[-2]
+    site = urlparse(url).netloc.split(".")[-2]
+    if site in ALLOW_SITES:
+        return site
+    else:
+        return None

 def check_url_locale(url):
    flag = {
--- a/pfunc/dump_to_ass.py
+++ b/pfunc/dump_to_ass.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:17:44
-# 上次编辑时间       : 2020-01-11 17:25:09
+# 上次编辑时间       : 2020-01-16 20:06:23
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import os
@@ -31,12 +31,16 @@ def write_lines_to_file(ass_head, lines, file_path):
        for line in lines:
            f.write(line + "\n")

-def check_file(name, skip=False, fpath=os.getcwd()):
+def check_file(name, args, fpath=os.getcwd()):
    flag = True
    file_path = os.path.join(fpath, name + ".ass")
    if os.path.isfile(file_path):
-        if skip:
+        if args.y:
            os.remove(file_path)
+        elif args.series:
+            # 存在重复的 那么直接pass（认为已经下载好了）
+            flag = False
+            return flag, file_path
        else:
            isremove = input("{}已存在，是否覆盖？(y/n)：".format(file_path))
            if isremove.strip() == "y":
--- a/pfunc/request_info.py
+++ b/pfunc/request_info.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:43
-# 上次编辑时间       : 2020-01-11 17:42:30
+# 上次编辑时间       : 2020-01-16 19:44:55
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import re
@@ -40,6 +40,28 @@ def get_all_vids_by_column_id():
    # 综艺类型的
    pass

+def get_cid_by_vid(vid):
+    api_url = "http://union.video.qq.com/fcgi-bin/data"
+    params = {
+        "tid": "98",
+        "appid": "10001005",
+        "appkey": "0d1a9ddd94de871b",
+        "idlist": vid,
+        "otype":"json"
+    }
+    r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8")
+    data = json.loads(r.lstrip("QZOutputJson=").rstrip(";"))
+    try:
+        cid = data["results"][0]["fields"]
+    except Exception as e:
+        print("load fields error info -->", e)
+        return None
+    if cid.get("sync_cover"):
+        return cid["sync_cover"]
+    elif cid.get("cover_list"):
+        return cid["cover_list"][0]
+    return
+
 def get_all_vids_by_cid(cid):
    api_url = "http://union.video.qq.com/fcgi-bin/data"
    params = {
--- a/sites/iqiyi.py
+++ b/sites/iqiyi.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:41
-# 上次编辑时间       : 2020-01-11 17:23:32
+# 上次编辑时间       : 2020-01-16 19:58:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

@@ -83,7 +83,7 @@ def main(args):
    subtitles = {}
    for name, duration, tvid in vinfos:
        print(name, "开始下载...")
-        flag, file_path = check_file(name, skip=args.y)
+        flag, file_path = check_file(name, args)
        if flag is False:
            print("跳过{}".format(name))
            continue
--- a/sites/qq.py
+++ b/sites/qq.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:37
-# 上次编辑时间       : 2020-01-11 17:25:34
+# 上次编辑时间       : 2020-01-16 20:04:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

@@ -14,6 +14,7 @@ import requests

 from basic.vars import qqlive
 from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+from pfunc.request_info import get_cid_by_vid
 from pfunc.request_info import get_all_vids_by_cid as get_vids
 from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id

@@ -97,10 +98,10 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
    return comments

    
-def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=False):
+def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, args=""):
    vid, name, duration, target_id = vinfo
    print(name, "开始下载...")
-    flag, file_path = check_file(name, skip=skip)
+    flag, file_path = check_file(name, args)
    if flag is False:
        print("跳过{}".format(name))
        return
@@ -108,7 +109,7 @@ def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=Fal
    # print("{}弹幕下载完成！".format(name))
    return comments, file_path

-def ask_input(url=""):
+def ask_input(url="", isall=False):
    if url == "":
        url = input("请输入vid/coverid/链接，输入q退出：\n").strip()
    if url == "q" or url == "":
@@ -117,6 +118,9 @@ def ask_input(url=""):
    params = url.replace(".html", "").split("/")
    if params[-1].__len__() == 11:
        vids = [params[-1]]
+        if isall:
+            cid = get_cid_by_vid(params[-1])
+            vids += get_vids(cid)
    elif params[-1].__len__() == 15:
        cid = params[-1]
        vids = get_vids(cid)
@@ -132,6 +136,9 @@ def ask_input(url=""):

 def main(args):
    vids = []
+    isall = False
+    if args.series:
+        isall = True
    if args.cid and args.cid.__len__() == 15:
        vids += get_vids(args.cid)
    if args.vid:
@@ -141,16 +148,26 @@ def main(args):
            vids += [vid for vid in args.vid.strip().replace(" ", "").split(",") if vid.__len__() == 11]
        else:
            pass
+        if args.series:
+            cid = get_cid_by_vid(args.vid)
+            vids += get_vids(cid)
    if args.url:
-        vids += ask_input(url=args.url)
+        vids += ask_input(url=args.url, isall=isall)
    if args.vid == "" and args.cid == "" and args.url == "":
-        vids += ask_input()
+        vids += ask_input(isall=isall)
    if vids.__len__() <= 0:
        sys.exit("没有任何有效输入")
+    vids_bak = vids
+    vids = []
+    for vid in vids_bak:
+        if vid in vids:
+            continue
+        else:
+            vids.append(vid)
    vinfos = get_video_info_by_vid(vids)
    subtitles = {}
    for vinfo in vinfos:
-        infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y)
+        infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args=args)
        if infos is None:
            continue
        comments, file_path = infos
--- a/sites/sohu.py
+++ b/sites/sohu.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3.7
+# coding=utf-8
+'''
+# 作者: weimo
+# 创建日期: 2020-01-16 17:45:35
+# 上次编辑时间       : 2020-01-16 20:09:22
+# 一个人的命运啊,当然要靠自我奋斗,但是...
+'''
+import json
+import requests
+
+from basic.vars import chrome
+from pfunc.request_info import matchit
+from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+
+def try_decode(content):
+    flag = False
+    methods = ["gbk", "utf-8"]
+    for method in methods:
+        try:
+            content_decode = content.decode(method)
+        except Exception as e:
+            print("try {} decode method failed.".format(method))
+            continue
+        flag = True
+        break
+    if flag is True:
+        return content_decode
+    else:
+        return None
+
+def get_vinfos_by_url(url: str):
+    ep_url = matchit(["[\s\S]+?tv.sohu.com/v/(.+?)\.html", "[\s\S]+?tv.sohu.com/(.+?)/(.+?)\.html"], url)
+    aid_url = matchit(["[\s\S]+?tv.sohu.com/album/.(\d+)\.shtml"], url)
+    vid_url = matchit(["[\s\S]+?tv.sohu.com/v(\d+)\.shtml"], url)
+    if ep_url:
+        try:
+            r = requests.get(url, headers=chrome, timeout=3).content
+        except Exception as e:
+            print(e)
+            print("get sohu (url -> {}) ep url failed.".format(url))
+            return
+        r_decode = try_decode(r)
+        if r_decode is None:
+            print("ep response use decode failed(url -> {}).".format(url))
+            return None
+        vid = matchit(["[\s\S]+?var vid.+?(\d+)"], r_decode)
+        if vid:
+            vinfo = get_vinfo_by_vid(vid)
+            if vinfo is None:
+                return
+            else:
+                return [vinfo]
+        else:
+            print("match sohu vid (url -> {}) failed.".format(url))
+            return None
+    if aid_url:
+        return get_vinfos(aid_url)
+    if vid_url:
+        vinfo = get_vinfo_by_vid(vid_url)
+        if vinfo is None:
+            return
+        else:
+            return [vinfo]
+    if ep_url is None and aid_url is None and vid_url is None:
+        # 可能是合集页面
+        try:
+            r = requests.get(url, headers=chrome, timeout=3).content
+        except Exception as e:
+            print("get sohu (url -> {}) album url failed.".format(url))
+            return
+        r_decode = try_decode(r)
+        if r_decode is None:
+            print("album response decode failed(url -> {}).".format(url))
+            return None
+        aid = matchit(["[\s\S]+?var playlistId.+?(\d+)"], r_decode)
+        if aid:
+            return get_vinfos(aid)
+    return
+
+
+def get_vinfos(aid: str):
+    api_url = "https://pl.hd.sohu.com/videolist"
+    params = {
+        "callback": "",
+        "playlistid": aid,
+        "o_playlistId": "",
+        "pianhua": "0",
+        "pagenum": "1",
+        "pagesize": "999",
+        "order": "0", # 0 从小到大
+        "cnt": "1",
+        "pageRule": "2",
+        "withPgcVideo": "0",
+        "ssl": "0",
+        "preVideoRule": "3",
+        "_": "" # 1579167883430
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("gbk")
+    except Exception as e:
+        print("get sohu (vid -> {}) videolist failed.".format(vid))
+        return None
+    data = json.loads(r)
+    if data.get("videos"):
+        videos = data["videos"]
+    else:
+        print("videolist has no videos (aid -> {}).".format(aid))
+        return None
+    vinfos = [[video["name"], int(float(video["playLength"])), video["vid"], aid] for video in videos]
+    return vinfos
+
+
+def get_vinfo_by_vid(vid: str):
+    api_url = "https://hot.vrs.sohu.com/vrs_flash.action"
+    params = {
+        "vid": vid,
+        "ver": "31",
+        "ssl": "1",
+        "pflag": "pch5"
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
+    except Exception as e:
+        print("get sohu (vid -> {}) vinfo failed.".format(vid))
+        return None
+    data = json.loads(r)
+    if data.get("status") == 1:
+        aid = ""
+        if data.get("pid"):
+            aid = str(data["pid"])
+        if data.get("data"):
+            data = data["data"]
+        else:
+            print("vid -> {} vinfo request return no data.".format(vid))
+            return
+    else:
+        print("vid -> {} vinfo request return error.".format(vid))
+        return
+    return [data["tvName"], int(float(data["totalDuration"])), vid, aid]
+
+def get_danmu_all_by_vid(vid: str, aid: str, duration: int):
+    api_url = "https://api.danmu.tv.sohu.com/dmh5/dmListAll"
+    params = {
+        "act": "dmlist_v2",
+        "dct": "1",
+        "request_from": "h5_js",
+        "vid": vid,
+        "page": "1",
+        "pct": "2",
+        "from": "PlayerType.SOHU_VRS",
+        "o": "4",
+        "aid": aid,
+        "time_begin": "0",
+        "time_end": str(duration)
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
+    except Exception as e:
+        print("get sohu (vid -> {}) danmu failed.".format(vid))
+        return None
+    data = json.loads(r)["info"]["comments"]
+    comments = []
+    for comment in data:
+        comments.append([comment["c"], "ffffff", comment["v"]])
+    comments = sorted(comments, key=lambda _: _[-1])
+    return comments
+
+def main(args):
+    vinfos = []
+    if args.vid:
+        vi = get_vinfo_by_vid(args.vid)
+        if vi:
+            vinfos.append(vi)
+    if args.aid:
+        vi = get_vinfos(args.aid)
+        if vi:
+            vinfos += vi
+    if args.vid == "" and args.aid == "" and args.url == "":
+        args.url = input("请输入sohu链接：\n")
+    if args.url:
+        vi = get_vinfos_by_url(args.url)
+        if vi:
+            vinfos += vi
+    subtitles = {}
+    for name, duration, vid, aid in vinfos:
+        print(name, "开始下载...")
+        flag, file_path = check_file(name, args)
+        if flag is False:
+            print("跳过{}".format(name))
+            continue
+        comments = get_danmu_all_by_vid(vid, aid, duration)
+        if comments is None:
+            print(name, "弹幕获取失败了，记得重试~(@^_^@)~")
+            continue
+        comments = write_one_video_subtitles(file_path, comments, args)
+        subtitles.update({file_path:comments})
+        print(name, "下载完成！")
+    return subtitles
--- a/sites/youku.py
+++ b/sites/youku.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-05 14:52:21
-# 上次编辑时间       : 2020-01-11 17:53:14
+# 上次编辑时间       : 2020-01-16 19:59:08
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import re
@@ -119,7 +119,7 @@ def main(args):
    subtitles = {}
    for name, duration, video_id in vinfos:
        print(name, "开始下载...")
-        flag, file_path = check_file(name, skip=args.y)
+        flag, file_path = check_file(name, args=args)
        if flag is False:
            print("跳过{}".format(name))
            continue