From 986ec2b9fe3f39cdaf01b7ff20dcd274d1538a5e Mon Sep 17 00:00:00 2001
From: xhlove <vvtoolbox.dev@gmail.com>
Date: Thu, 16 Jan 2020 20:12:07 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=90=9C=E7=8B=90=E8=A7=86?=
 =?UTF-8?q?=E9=A2=91=E5=BC=B9=E5=B9=95=E4=B8=8B=E8=BD=BD=E5=B9=B6=E6=94=B9?=
 =?UTF-8?q?=E8=BF=9B=E8=BE=93=E5=85=A5=E6=8F=90=E7=A4=BA?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 GetDanMu.py           |  26 +++++-
 README.md             |  19 ++--
 basic/vars.py         |   5 +-
 pfunc/cfunc.py        |  10 ++-
 pfunc/dump_to_ass.py  |  10 ++-
 pfunc/request_info.py |  24 ++++-
 sites/iqiyi.py        |   4 +-
 sites/qq.py           |  31 +++++--
 sites/sohu.py         | 199 ++++++++++++++++++++++++++++++++++++++++++
 sites/youku.py        |   4 +-
 10 files changed, 302 insertions(+), 30 deletions(-)
 create mode 100644 sites/sohu.py

diff --git a/GetDanMu.py b/GetDanMu.py
index cffe95a..7b96f2b 100644
--- a/GetDanMu.py
+++ b/GetDanMu.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:39
-# 上次编辑时间       : 2020-01-11 18:40:49
+# 上次编辑时间       : 2020-01-16 19:24:10
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 
@@ -14,6 +14,7 @@ from argparse import ArgumentParser
 from sites.qq import main as qq
 from sites.iqiyi import main as iqiyi
 from sites.youku import main as youku
+from sites.sohu import main as sohu
 from pfunc.cfunc import check_url_site
 
 # -------------------------------------------
@@ -37,19 +38,36 @@ def main():
     parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕，支持同时多个tvid，需要用逗号隔开")
     parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕")
     parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕")
-    parser.add_argument("-y", "--y", action="store_false", help="默认覆盖原有弹幕而不提示")
+    parser.add_argument("-y", "--y", action="store_true", help="默认覆盖原有弹幕而不提示")
     args = parser.parse_args()
     # print(args.__dict__)
+    init_args = sys.argv
+    imode = "command_line"
+    if init_args.__len__() == 1:
+        # 双击运行或命令执行exe文件时 传入参数只有exe的路径 
+        # 命令行下执行会传入exe的相对路径（在exe所在路径执行时） 传入完整路径（非exe所在路径下执行）
+        # 双击运行exe传入完整路径
+        imode == "non_command_line"
+    if imode == "non_command_line":
+        content = input("请输入链接：\n")
+        check_tip = check_url_site(content)
+        if check_tip is None:
+            sys.exit("不支持的网站")
+        args.url = content
+        args.site = check_tip
+    # 要么有url 要么有site和相关参数的组合
     if args.url != "":
         args.site = check_url_site(args.url)
-    if args.site == "":
-        args.site = input("请输入站点（qq/iqiyi/youku）：\n")
+    elif args.site == "":
+        sys.exit("请传入链接或指定网站+视频相关的参数")
     if args.site == "qq":
         subtitles = qq(args)
     if args.site == "iqiyi":
         subtitles = iqiyi(args)
     if args.site == "youku":
         subtitles = youku(args)
+    if args.site == "sohu":
+        subtitles = sohu(args)
 
 if __name__ == "__main__":
     # 打包 --> pyinstaller GetDanMu.spec
diff --git a/README.md b/README.md
index f307414..04864ee 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,7 @@
-<!--
- * @作者: weimo
- * @创建日期: 2020-01-04 18:45:58
- * @上次编辑时间       : 2020-01-11 18:43:20
- * @一个人的命运啊,当然要靠自我奋斗,但是...
- -->
 # GetDanMu
 
-转换/下载各类视频弹幕的工具
+[转换/下载各类视频弹幕的工具][1]
+
 项目主页：https://github.com/xhlove/GetDanMu
 
 ## 网站支持
@@ -15,6 +10,7 @@
 | **腾讯视频** | <https://v.qq.com/>    |✓|✓| |
 | **爱奇艺** | <https://www.iqiyi.com/>    |✓|✓|✓|
 | **优酷** | <https://v.youku.com/>    |✓|✓|✓|
+| **搜狐视频** | <https://tv.sohu.com/>    |✓|✓||
 
 ## 可能存在的问题
 - 下载进度接近100%时暂时没有反应
@@ -26,6 +22,11 @@
 
 # 更新日志
 
+## 2020/1/16
+- 增加搜狐视频的支持（剧集）
+- 改进输入提示（双击运行时）
+- 腾讯支持-series设定
+
 ## 2020/1/11
 - 增加优酷弹幕下载，支持合集，支持通过单集直接下载合集弹幕（暂时仅限优酷）
 - 改进去重方式
@@ -35,4 +36,6 @@
 ## 2020/1/5
 
 - 增加了通过链接下载爱奇艺视频弹幕的方法，支持综艺合集。
-- 增加通过链接判断网站
\ No newline at end of file
+- 增加通过链接判断网站
+
+  [1]: https://blog.weimo.info/archives/431/
\ No newline at end of file
diff --git a/basic/vars.py b/basic/vars.py
index 7ecec75..c95bfbd 100644
--- a/basic/vars.py
+++ b/basic/vars.py
@@ -3,9 +3,12 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:35
-# 上次编辑时间: 2020-01-05 14:46:15
+# 上次编辑时间       : 2020-01-16 19:10:06
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
+
+ALLOW_SITES = ["qq", "iqiyi", "youku", "sohu"]
+
 qqlive = {
     "User-Agent":"qqlive"
 }
diff --git a/pfunc/cfunc.py b/pfunc/cfunc.py
index 73e970f..f099fff 100644
--- a/pfunc/cfunc.py
+++ b/pfunc/cfunc.py
@@ -3,13 +3,15 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-05 12:45:18
-# 上次编辑时间       : 2020-01-11 17:37:22
+# 上次编辑时间       : 2020-01-16 14:50:34
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 
 import hashlib
 from urllib.parse import urlparse
 
+from basic.vars import ALLOW_SITES
+
 def remove_same_danmu(comments: list):
     # 在原有基础上pop会引起索引变化 所以还是采用下面这个方式
     contents = []
@@ -23,7 +25,11 @@ def remove_same_danmu(comments: list):
     return contents
 
 def check_url_site(url):
-    return urlparse(url).netloc.split(".")[-2]
+    site = urlparse(url).netloc.split(".")[-2]
+    if site in ALLOW_SITES:
+        return site
+    else:
+        return None
 
 def check_url_locale(url):
     flag = {
diff --git a/pfunc/dump_to_ass.py b/pfunc/dump_to_ass.py
index 5af0fb3..61f9fd8 100644
--- a/pfunc/dump_to_ass.py
+++ b/pfunc/dump_to_ass.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:17:44
-# 上次编辑时间       : 2020-01-11 17:25:09
+# 上次编辑时间       : 2020-01-16 20:06:23
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import os
@@ -31,12 +31,16 @@ def write_lines_to_file(ass_head, lines, file_path):
         for line in lines:
             f.write(line + "\n")
 
-def check_file(name, skip=False, fpath=os.getcwd()):
+def check_file(name, args, fpath=os.getcwd()):
     flag = True
     file_path = os.path.join(fpath, name + ".ass")
     if os.path.isfile(file_path):
-        if skip:
+        if args.y:
             os.remove(file_path)
+        elif args.series:
+            # 存在重复的 那么直接pass（认为已经下载好了）
+            flag = False
+            return flag, file_path
         else:
             isremove = input("{}已存在，是否覆盖？(y/n)：".format(file_path))
             if isremove.strip() == "y":
diff --git a/pfunc/request_info.py b/pfunc/request_info.py
index 34a6e28..3a360ac 100644
--- a/pfunc/request_info.py
+++ b/pfunc/request_info.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:43
-# 上次编辑时间       : 2020-01-11 17:42:30
+# 上次编辑时间       : 2020-01-16 19:44:55
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import re
@@ -40,6 +40,28 @@ def get_all_vids_by_column_id():
     # 综艺类型的
     pass
 
+def get_cid_by_vid(vid):
+    api_url = "http://union.video.qq.com/fcgi-bin/data"
+    params = {
+        "tid": "98",
+        "appid": "10001005",
+        "appkey": "0d1a9ddd94de871b",
+        "idlist": vid,
+        "otype":"json"
+    }
+    r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8")
+    data = json.loads(r.lstrip("QZOutputJson=").rstrip(";"))
+    try:
+        cid = data["results"][0]["fields"]
+    except Exception as e:
+        print("load fields error info -->", e)
+        return None
+    if cid.get("sync_cover"):
+        return cid["sync_cover"]
+    elif cid.get("cover_list"):
+        return cid["cover_list"][0]
+    return
+
 def get_all_vids_by_cid(cid):
     api_url = "http://union.video.qq.com/fcgi-bin/data"
     params = {
diff --git a/sites/iqiyi.py b/sites/iqiyi.py
index e4c6236..d928692 100644
--- a/sites/iqiyi.py
+++ b/sites/iqiyi.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:41
-# 上次编辑时间       : 2020-01-11 17:23:32
+# 上次编辑时间       : 2020-01-16 19:58:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 
@@ -83,7 +83,7 @@ def main(args):
     subtitles = {}
     for name, duration, tvid in vinfos:
         print(name, "开始下载...")
-        flag, file_path = check_file(name, skip=args.y)
+        flag, file_path = check_file(name, args)
         if flag is False:
             print("跳过{}".format(name))
             continue
diff --git a/sites/qq.py b/sites/qq.py
index 0930226..3e291f5 100644
--- a/sites/qq.py
+++ b/sites/qq.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:14:37
-# 上次编辑时间       : 2020-01-11 17:25:34
+# 上次编辑时间       : 2020-01-16 20:04:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 
@@ -14,6 +14,7 @@ import requests
 
 from basic.vars import qqlive
 from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+from pfunc.request_info import get_cid_by_vid
 from pfunc.request_info import get_all_vids_by_cid as get_vids
 from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id
 
@@ -97,10 +98,10 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
     return comments
 
     
-def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=False):
+def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, args=""):
     vid, name, duration, target_id = vinfo
     print(name, "开始下载...")
-    flag, file_path = check_file(name, skip=skip)
+    flag, file_path = check_file(name, args)
     if flag is False:
         print("跳过{}".format(name))
         return
@@ -108,7 +109,7 @@ def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=Fal
     # print("{}弹幕下载完成！".format(name))
     return comments, file_path
 
-def ask_input(url=""):
+def ask_input(url="", isall=False):
     if url == "":
         url = input("请输入vid/coverid/链接，输入q退出：\n").strip()
     if url == "q" or url == "":
@@ -117,6 +118,9 @@ def ask_input(url=""):
     params = url.replace(".html", "").split("/")
     if params[-1].__len__() == 11:
         vids = [params[-1]]
+        if isall:
+            cid = get_cid_by_vid(params[-1])
+            vids += get_vids(cid)
     elif params[-1].__len__() == 15:
         cid = params[-1]
         vids = get_vids(cid)
@@ -132,6 +136,9 @@ def ask_input(url=""):
 
 def main(args):
     vids = []
+    isall = False
+    if args.series:
+        isall = True
     if args.cid and args.cid.__len__() == 15:
         vids += get_vids(args.cid)
     if args.vid:
@@ -141,16 +148,26 @@ def main(args):
             vids += [vid for vid in args.vid.strip().replace(" ", "").split(",") if vid.__len__() == 11]
         else:
             pass
+        if args.series:
+            cid = get_cid_by_vid(args.vid)
+            vids += get_vids(cid)
     if args.url:
-        vids += ask_input(url=args.url)
+        vids += ask_input(url=args.url, isall=isall)
     if args.vid == "" and args.cid == "" and args.url == "":
-        vids += ask_input()
+        vids += ask_input(isall=isall)
     if vids.__len__() <= 0:
         sys.exit("没有任何有效输入")
+    vids_bak = vids
+    vids = []
+    for vid in vids_bak:
+        if vid in vids:
+            continue
+        else:
+            vids.append(vid)
     vinfos = get_video_info_by_vid(vids)
     subtitles = {}
     for vinfo in vinfos:
-        infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y)
+        infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args=args)
         if infos is None:
             continue
         comments, file_path = infos
diff --git a/sites/sohu.py b/sites/sohu.py
new file mode 100644
index 0000000..68d4f18
--- /dev/null
+++ b/sites/sohu.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3.7
+# coding=utf-8
+'''
+# 作者: weimo
+# 创建日期: 2020-01-16 17:45:35
+# 上次编辑时间       : 2020-01-16 20:09:22
+# 一个人的命运啊,当然要靠自我奋斗,但是...
+'''
+import json
+import requests
+
+from basic.vars import chrome
+from pfunc.request_info import matchit
+from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+
+def try_decode(content):
+    flag = False
+    methods = ["gbk", "utf-8"]
+    for method in methods:
+        try:
+            content_decode = content.decode(method)
+        except Exception as e:
+            print("try {} decode method failed.".format(method))
+            continue
+        flag = True
+        break
+    if flag is True:
+        return content_decode
+    else:
+        return None
+
+def get_vinfos_by_url(url: str):
+    ep_url = matchit(["[\s\S]+?tv.sohu.com/v/(.+?)\.html", "[\s\S]+?tv.sohu.com/(.+?)/(.+?)\.html"], url)
+    aid_url = matchit(["[\s\S]+?tv.sohu.com/album/.(\d+)\.shtml"], url)
+    vid_url = matchit(["[\s\S]+?tv.sohu.com/v(\d+)\.shtml"], url)
+    if ep_url:
+        try:
+            r = requests.get(url, headers=chrome, timeout=3).content
+        except Exception as e:
+            print(e)
+            print("get sohu (url -> {}) ep url failed.".format(url))
+            return
+        r_decode = try_decode(r)
+        if r_decode is None:
+            print("ep response use decode failed(url -> {}).".format(url))
+            return None
+        vid = matchit(["[\s\S]+?var vid.+?(\d+)"], r_decode)
+        if vid:
+            vinfo = get_vinfo_by_vid(vid)
+            if vinfo is None:
+                return
+            else:
+                return [vinfo]
+        else:
+            print("match sohu vid (url -> {}) failed.".format(url))
+            return None
+    if aid_url:
+        return get_vinfos(aid_url)
+    if vid_url:
+        vinfo = get_vinfo_by_vid(vid_url)
+        if vinfo is None:
+            return
+        else:
+            return [vinfo]
+    if ep_url is None and aid_url is None and vid_url is None:
+        # 可能是合集页面
+        try:
+            r = requests.get(url, headers=chrome, timeout=3).content
+        except Exception as e:
+            print("get sohu (url -> {}) album url failed.".format(url))
+            return
+        r_decode = try_decode(r)
+        if r_decode is None:
+            print("album response decode failed(url -> {}).".format(url))
+            return None
+        aid = matchit(["[\s\S]+?var playlistId.+?(\d+)"], r_decode)
+        if aid:
+            return get_vinfos(aid)
+    return
+
+
+def get_vinfos(aid: str):
+    api_url = "https://pl.hd.sohu.com/videolist"
+    params = {
+        "callback": "",
+        "playlistid": aid,
+        "o_playlistId": "",
+        "pianhua": "0",
+        "pagenum": "1",
+        "pagesize": "999",
+        "order": "0", # 0 从小到大
+        "cnt": "1",
+        "pageRule": "2",
+        "withPgcVideo": "0",
+        "ssl": "0",
+        "preVideoRule": "3",
+        "_": "" # 1579167883430
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("gbk")
+    except Exception as e:
+        print("get sohu (vid -> {}) videolist failed.".format(vid))
+        return None
+    data = json.loads(r)
+    if data.get("videos"):
+        videos = data["videos"]
+    else:
+        print("videolist has no videos (aid -> {}).".format(aid))
+        return None
+    vinfos = [[video["name"], int(float(video["playLength"])), video["vid"], aid] for video in videos]
+    return vinfos
+
+
+def get_vinfo_by_vid(vid: str):
+    api_url = "https://hot.vrs.sohu.com/vrs_flash.action"
+    params = {
+        "vid": vid,
+        "ver": "31",
+        "ssl": "1",
+        "pflag": "pch5"
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
+    except Exception as e:
+        print("get sohu (vid -> {}) vinfo failed.".format(vid))
+        return None
+    data = json.loads(r)
+    if data.get("status") == 1:
+        aid = ""
+        if data.get("pid"):
+            aid = str(data["pid"])
+        if data.get("data"):
+            data = data["data"]
+        else:
+            print("vid -> {} vinfo request return no data.".format(vid))
+            return
+    else:
+        print("vid -> {} vinfo request return error.".format(vid))
+        return
+    return [data["tvName"], int(float(data["totalDuration"])), vid, aid]
+
+def get_danmu_all_by_vid(vid: str, aid: str, duration: int):
+    api_url = "https://api.danmu.tv.sohu.com/dmh5/dmListAll"
+    params = {
+        "act": "dmlist_v2",
+        "dct": "1",
+        "request_from": "h5_js",
+        "vid": vid,
+        "page": "1",
+        "pct": "2",
+        "from": "PlayerType.SOHU_VRS",
+        "o": "4",
+        "aid": aid,
+        "time_begin": "0",
+        "time_end": str(duration)
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
+    except Exception as e:
+        print("get sohu (vid -> {}) danmu failed.".format(vid))
+        return None
+    data = json.loads(r)["info"]["comments"]
+    comments = []
+    for comment in data:
+        comments.append([comment["c"], "ffffff", comment["v"]])
+    comments = sorted(comments, key=lambda _: _[-1])
+    return comments
+
+def main(args):
+    vinfos = []
+    if args.vid:
+        vi = get_vinfo_by_vid(args.vid)
+        if vi:
+            vinfos.append(vi)
+    if args.aid:
+        vi = get_vinfos(args.aid)
+        if vi:
+            vinfos += vi
+    if args.vid == "" and args.aid == "" and args.url == "":
+        args.url = input("请输入sohu链接：\n")
+    if args.url:
+        vi = get_vinfos_by_url(args.url)
+        if vi:
+            vinfos += vi
+    subtitles = {}
+    for name, duration, vid, aid in vinfos:
+        print(name, "开始下载...")
+        flag, file_path = check_file(name, args)
+        if flag is False:
+            print("跳过{}".format(name))
+            continue
+        comments = get_danmu_all_by_vid(vid, aid, duration)
+        if comments is None:
+            print(name, "弹幕获取失败了，记得重试~(@^_^@)~")
+            continue
+        comments = write_one_video_subtitles(file_path, comments, args)
+        subtitles.update({file_path:comments})
+        print(name, "下载完成！")
+    return subtitles
\ No newline at end of file
diff --git a/sites/youku.py b/sites/youku.py
index e1f18b6..86d34cf 100644
--- a/sites/youku.py
+++ b/sites/youku.py
@@ -3,7 +3,7 @@
 '''
 # 作者: weimo
 # 创建日期: 2020-01-05 14:52:21
-# 上次编辑时间       : 2020-01-11 17:53:14
+# 上次编辑时间       : 2020-01-16 19:59:08
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
 import re
@@ -119,7 +119,7 @@ def main(args):
     subtitles = {}
     for name, duration, video_id in vinfos:
         print(name, "开始下载...")
-        flag, file_path = check_file(name, skip=args.y)
+        flag, file_path = check_file(name, args=args)
         if flag is False:
             print("跳过{}".format(name))
             continue