fix bug

增加搜狐视频弹幕下载并改进输入提示
readme update
2025-12-19 17:45:55 +08:00 · 2020-01-16 20:17:57 +08:00 · 2020-01-16 20:12:07 +08:00 · 2020-01-11 18:46:16 +08:00 · 2020-01-11 17:57:51 +08:00 · 2020-01-05 14:49:33 +08:00
15 changed files with 772 additions and 110 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # 额外
 .vscode/
 releases/
+test/
 *.ass
 methods/calc_danmu_pos.py

--- a/GetDanMu.py
+++ b/GetDanMu.py
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2020-01-04 12:59:11
-# 上次编辑时间       : 2020-01-04 20:14:39
+# 创建日期: 2020-01-04 19:14:39
+# 上次编辑时间       : 2020-01-16 20:15:52
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

@@ -13,10 +13,9 @@ from argparse import ArgumentParser

 from sites.qq import main as qq
 from sites.iqiyi import main as iqiyi
-from basic.ass import get_ass_head, check_font
-from pfunc.dump_to_ass import write_lines_to_file
-from methods.assbase import ASS
-from methods.sameheight import SameHeight
+from sites.youku import main as youku
+from sites.sohu import main as sohu
+from pfunc.cfunc import check_url_site

 # -------------------------------------------
 # 基本流程
@@ -29,31 +28,46 @@ from methods.sameheight import SameHeight


 def main():
-    parser = ArgumentParser(description="视频网站弹幕转换/下载工具，任何问题请联系vvtoolbox.dev@gmail.com")
+    parser = ArgumentParser(description="视频网站弹幕转换/下载工具，项目地址https://github.com/xhlove/GetDanMu，任何问题请联系vvtoolbox.dev@gmail.com")
    parser.add_argument("-f", "--font", default="微软雅黑", help="指定输出字幕字体")
    parser.add_argument("-fs", "--font-size", default=28, help="指定输出字幕字体大小")
-    parser.add_argument("-s", "--site", default="qq", help="指定网站")
+    parser.add_argument("-s", "--site", default="", help="指定网站")
    parser.add_argument("-cid", "--cid", default="", help="下载cid对应视频的弹幕（腾讯视频合集）")
    parser.add_argument("-vid", "--vid", default="", help="下载vid对应视频的弹幕，支持同时多个vid，需要用逗号隔开")
    parser.add_argument("-aid", "--aid", default="", help="下载aid对应视频的弹幕（爱奇艺合集）")
    parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕，支持同时多个tvid，需要用逗号隔开")
+    parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕")
    parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕")
-    parser.add_argument("-y", "--y", action="store_true", help="覆盖原有弹幕而不提示")
+    parser.add_argument("-y", "--y", action="store_true", help="默认覆盖原有弹幕而不提示")
    args = parser.parse_args()
    # print(args.__dict__)
-    font_path, font_style_name = check_font(args.font)
-    ass_head = get_ass_head(font_style_name, args.font_size)
+    init_args = sys.argv
+    imode = "command_line"
+    if init_args.__len__() == 1:
+        # 双击运行或命令执行exe文件时 传入参数只有exe的路径 
+        # 命令行下执行会传入exe的相对路径（在exe所在路径执行时） 传入完整路径（非exe所在路径下执行）
+        # 双击运行exe传入完整路径
+        imode = "non_command_line"
+    if imode == "non_command_line":
+        content = input("请输入链接：\n")
+        check_tip = check_url_site(content)
+        if check_tip is None:
+            sys.exit("不支持的网站")
+        args.url = content
+        args.site = check_tip
+    # 要么有url 要么有site和相关参数的组合
+    if args.url != "":
+        args.site = check_url_site(args.url)
+    elif args.site == "":
+        sys.exit("请传入链接或指定网站+视频相关的参数")
    if args.site == "qq":
        subtitles = qq(args)
    if args.site == "iqiyi":
        subtitles = iqiyi(args)
-
-    for file_path, comments in subtitles.items():
-        get_xy_obj = SameHeight("那就写这一句作为初始化测试吧！", font_path=font_path, font_size=args.font_size)
-        subtitle = ASS(file_path, get_xy_obj, font=font_style_name)
-        for comment in comments:
-            subtitle.create_new_line(comment)
-        write_lines_to_file(ass_head, subtitle.lines, file_path)
+    if args.site == "youku":
+        subtitles = youku(args)
+    if args.site == "sohu":
+        subtitles = sohu(args)

 if __name__ == "__main__":
    # 打包 --> pyinstaller GetDanMu.spec
--- a/README.md
+++ b/README.md
@@ -1,2 +1,41 @@
 # GetDanMu
-转换/下载各类视频的弹幕
+
+[转换/下载各类视频弹幕的工具][1]
+
+项目主页：https://github.com/xhlove/GetDanMu
+
+## 网站支持
+| Site | URL | 单集? | 合集? | 综艺合集? |
+| :--: | :-- | :-----: | :-----: | :-----: |
+| **腾讯视频** | <https://v.qq.com/>    |✓|✓| |
+| **爱奇艺** | <https://www.iqiyi.com/>    |✓|✓|✓|
+| **优酷** | <https://v.youku.com/>    |✓|✓|✓|
+| **搜狐视频** | <https://tv.sohu.com/>    |✓|✓||
+
+## 可能存在的问题
+- 下载进度接近100%时暂时没有反应
+
+这是因为在全部弹幕获取完后一次性处理所致，对于时间过长和弹幕过多的视频，处理耗时较多，属于正常现象。
+- 命令组合未达到预期效果
+
+当前的逻辑并不完善，如果出现这种现象请反馈给我。
+
+# 更新日志
+
+## 2020/1/16
+- 增加搜狐视频的支持（剧集）
+- 改进输入提示（双击运行时）
+- 腾讯支持-series设定
+
+## 2020/1/11
+- 增加优酷弹幕下载，支持合集，支持通过单集直接下载合集弹幕（暂时仅限优酷）
+- 改进去重方式
+- 优酷的视频id用vid指代，若下载合集请使用连接或通过`-series`选项下载合集弹幕
+- 加入下载进度显示，后续可能改进为单行刷新
+
+## 2020/1/5
+
+- 增加了通过链接下载爱奇艺视频弹幕的方法，支持综艺合集。
+- 增加通过链接判断网站
+
+  [1]: https://blog.weimo.info/archives/431/
--- a/basic/ass.py
+++ b/basic/ass.py
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2020-01-04 13:05:23
-# 上次编辑时间       : 2020-01-04 15:52:11
+# 创建日期: 2020-01-04 19:14:46
+# 上次编辑时间       : 2020-01-11 17:20:21
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

@@ -63,9 +63,3 @@ def check_font(font):
    else:
        pass
    return font_path, font_style_name
-
-def check_content(content: str, comments: list):
-    content = content.replace(" ", "")
-    if content in comments:
-        return
-    return content
--- a/basic/vars.py
+++ b/basic/vars.py
@@ -1,17 +1,23 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2020-01-04 13:16:18
-# 上次编辑时间       : 2020-01-04 16:08:34
+# 创建日期: 2020-01-04 19:14:35
+# 上次编辑时间       : 2020-01-16 19:10:06
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
+
+ALLOW_SITES = ["qq", "iqiyi", "youku", "sohu"]
+
 qqlive = {
    "User-Agent":"qqlive"
 }
 iqiyiplayer = {
    "User-Agent":"Qiyi List Client PC 7.2.102.1343"
 }
+chrome = {
+    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
+}
 fonts = {
    "微软雅黑":"msyh.ttc",
    "微软雅黑粗体":"msyhbd.ttc",
--- a/methods/assbase.py
+++ b/methods/assbase.py
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2020-01-04 13:01:04
-# 上次编辑时间       : 2020-01-04 15:42:02
+# 创建日期: 2020-01-04 19:14:32
+# 上次编辑时间: 2020-01-05 14:46:27
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

--- a/methods/sameheight.py
+++ b/methods/sameheight.py
@@ -1,12 +1,13 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2019-12-25 20:35:43
-# 上次编辑时间       : 2019-12-25 23:23:32
+# 创建日期: 2020-01-04 19:14:47
+# 上次编辑时间: 2020-01-05 14:46:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

+
 from PIL.ImageFont import truetype

 class SameHeight(object):
--- a/pfunc/cfunc.py
+++ b/pfunc/cfunc.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python3.7
+# coding=utf-8
+'''
+# 作者: weimo
+# 创建日期: 2020-01-05 12:45:18
+# 上次编辑时间       : 2020-01-16 14:50:34
+# 一个人的命运啊,当然要靠自我奋斗,但是...
+'''
+
+import hashlib
+from urllib.parse import urlparse
+
+from basic.vars import ALLOW_SITES
+
+def remove_same_danmu(comments: list):
+    # 在原有基础上pop会引起索引变化 所以还是采用下面这个方式
+    contents = []
+    for comment in comments:
+        content, color, timepoint = comment
+        content = content.replace(" ", "")
+        if content in contents:
+            continue
+        else:
+            contents.append([content, color, timepoint])
+    return contents
+
+def check_url_site(url):
+    site = urlparse(url).netloc.split(".")[-2]
+    if site in ALLOW_SITES:
+        return site
+    else:
+        return None
+
+def check_url_locale(url):
+    flag = {
+        "cn":"zh_cn",
+        "tw":"zh_tw",
+        "intl":"intl"
+    }
+    if urlparse(url).netloc.split(".")[0] == "tw":
+        return flag["tw"]
+    else:
+        return flag["cn"] 
+
+def yk_msg_sign(msg: str):
+    return hashlib.new("md5", bytes(msg + "MkmC9SoIw6xCkSKHhJ7b5D2r51kBiREr", "utf-8")).hexdigest()
+
+def yk_t_sign(token, t, appkey, data):
+    text = "&".join([token, t, appkey, data])
+    return hashlib.new('md5', bytes(text, 'utf-8')).hexdigest()
--- a/pfunc/dump_to_ass.py
+++ b/pfunc/dump_to_ass.py
@@ -1,26 +1,46 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
 # 创建日期: 2020-01-04 19:17:44
-# 上次编辑时间       : 2020-01-04 19:30:24
+# 上次编辑时间       : 2020-01-16 20:06:23
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
-
 import os

+from basic.ass import get_ass_head, check_font
+from methods.assbase import ASS
+from methods.sameheight import SameHeight
+from pfunc.cfunc import remove_same_danmu
+
+def write_one_video_subtitles(file_path, comments, args):
+    # 对于合集则每次都都得检查一次 也可以放在上一级 放在这里 考虑后面可能特殊指定字体的情况
+    font_path, font_style_name = check_font(args.font)
+    ass_head = get_ass_head(font_style_name, args.font_size)
+    get_xy_obj = SameHeight("那就写这一句作为初始化测试吧！", font_path=font_path, font_size=int(args.font_size))
+    subtitle = ASS(file_path, get_xy_obj, font=font_style_name)
+    comments = remove_same_danmu(comments)
+    for comment in comments:
+        subtitle.create_new_line(comment)
+    write_lines_to_file(ass_head, subtitle.lines, file_path)
+    return comments
+
 def write_lines_to_file(ass_head, lines, file_path):
    with open(file_path, "a+", encoding="utf-8") as f:
        f.write(ass_head + "\n")
        for line in lines:
            f.write(line + "\n")

-def check_file(name, skip=False, fpath=os.getcwd()):
+def check_file(name, args, fpath=os.getcwd()):
    flag = True
    file_path = os.path.join(fpath, name + ".ass")
    if os.path.isfile(file_path):
-        if skip:
+        if args.y:
            os.remove(file_path)
+        elif args.series:
+            # 存在重复的 那么直接pass（认为已经下载好了）
+            flag = False
+            return flag, file_path
        else:
            isremove = input("{}已存在，是否覆盖？(y/n)：".format(file_path))
            if isremove.strip() == "y":
--- a/pfunc/request_info.py
+++ b/pfunc/request_info.py
@@ -1,17 +1,18 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2020-01-04 13:15:25
-# 上次编辑时间       : 2020-01-04 17:47:16
+# 创建日期: 2020-01-04 19:14:43
+# 上次编辑时间       : 2020-01-16 19:44:55
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''
-
 import re
 import json
 import requests

-from basic.vars import qqlive, iqiyiplayer
+from time import localtime
+from pfunc.cfunc import check_url_locale
+from basic.vars import qqlive, iqiyiplayer, chrome

 # 放一些仅通过某个id获取另一个/多个id的方法

@@ -26,7 +27,7 @@ def get_danmu_target_id_by_vid(vid: str):
    try:
        r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8")
    except Exception as e:
-        print("error info -->", e)
+        print("target_id requests error info -->", e)
        return None
    data = json.loads(r.lstrip("QZOutputJson=").rstrip(";"))
    target_id = None
@@ -39,6 +40,28 @@ def get_all_vids_by_column_id():
    # 综艺类型的
    pass

+def get_cid_by_vid(vid):
+    api_url = "http://union.video.qq.com/fcgi-bin/data"
+    params = {
+        "tid": "98",
+        "appid": "10001005",
+        "appkey": "0d1a9ddd94de871b",
+        "idlist": vid,
+        "otype":"json"
+    }
+    r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8")
+    data = json.loads(r.lstrip("QZOutputJson=").rstrip(";"))
+    try:
+        cid = data["results"][0]["fields"]
+    except Exception as e:
+        print("load fields error info -->", e)
+        return None
+    if cid.get("sync_cover"):
+        return cid["sync_cover"]
+    elif cid.get("cover_list"):
+        return cid["cover_list"][0]
+    return
+
 def get_all_vids_by_cid(cid):
    api_url = "http://union.video.qq.com/fcgi-bin/data"
    params = {
@@ -53,7 +76,7 @@ def get_all_vids_by_cid(cid):
    try:
        nomal_ids = json.loads(data["results"][0]["fields"]["nomal_ids"])
    except Exception as e:
-        print("error info -->", e)
+        print("load nomal_ids error info -->", e)
        return None
    # F 2是免费 7是会员 0是最新正片之前的预告 4是正片之后的预告
    vids = [item["V"] for item in nomal_ids if item["F"] in [2, 7]]
@@ -63,23 +86,192 @@ def get_all_vids_by_cid(cid):

 #-------------------------------------------iqiyi--------------------------------------------

-def get_vinfos(aid):
+def get_vinfos(aid, locale="zh_cn"):
    api_url = "http://cache.video.iqiyi.com/avlist/{}/0/".format(aid)
+    if locale != "zh_cn":
+        api_url += "?locale=" + locale
    try:
-        r = requests.get(api_url, headers=iqiyiplayer).content.decode("utf-8")
+        r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
    except Exception as e:
-        print("error info -->", e)
+        print("get_vinfos requests error info -->", e)
        return None
    data = json.loads(r[len("var videoListC="):])
    try:
        vlist = data["data"]["vlist"]
    except Exception as e:
-        print("error info -->", e)
+        print("get_vinfos load vlist error info -->", e)
        return None
    vinfos = [[v["shortTitle"] + "_" + str(v["timeLength"]), v["timeLength"], ["id"]] for v in vlist]
    return vinfos

+def matchit(patterns, text):
+    ret = None
+    for pattern in patterns:
+        match = re.match(pattern, text)
+        if match:
+            ret = match.group(1)
+            break
+    return ret
+
+def duration_to_sec(duration):
+    return sum(x * int(t) for x, t in zip([3600, 60, 1][2 - duration.count(":"):], duration.split(":")))
+
+def get_year_range(aid, locale="zh_cn"):
+    # 获取第一个和最新一个视频的年份，生成列表返回，遇到任何错误则返回当前年份
+    year_start = year_end = localtime().tm_year
+    api_url = "http://pcw-api.iqiyi.com/album/album/baseinfo/{}".format(aid)
+    if locale != "zh_cn":
+        api_url += "?locale=" + locale
+    try:
+        r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
+    except Exception as e:
+        print("error info -->", e)
+        return list(range(year_start, year_end + 1))
+    data = json.loads(r)["data"]
+    if data.get("firstVideo"):
+        year_start = int(data["firstVideo"]["period"][:4])
+    if data.get("latestVideo"):
+        year_end = int(data["latestVideo"]["period"][:4])
+    return list(range(year_start, year_end + 1))
+
+def get_vinfo_by_tvid(tvid, locale="zh_cn"):
+    api_url = "https://pcw-api.iqiyi.com/video/video/baseinfo/{}".format(tvid)
+    if locale != "zh_cn":
+        api_url += "?locale=" + locale
+    try:
+        r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
+    except Exception as e:
+        print("error info -->", e)
+        return
+    data = json.loads(r)["data"]
+    if data.__class__ != dict:
+        return None
+    name = data["name"]
+    duration = data["durationSec"]
+    return [name + "_" + str(duration), duration, tvid]
+
+def get_vinfos_by_year(aid, years: list, cid=6, locale="zh_cn"):
+    api_url = "https://pcw-api.iqiyi.com/album/source/svlistinfo?cid={}&sourceid={}&timelist={}".format(cid, aid, ",".join([str(_) for _ in years.copy()]))
+    if locale != "zh_cn":
+        api_url += "&locale=" + locale
+    try:
+        r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
+    except Exception as e:
+        print("get_vinfos_by_year error info -->", e)
+        return None
+    data = json.loads(r)["data"]
+    vinfos = []
+    for year in years:
+        if year.__class__ != str:
+            year = str(year)
+        if data.get(year) is None:
+            continue
+        for ep in data[year]:
+            sec = duration_to_sec(ep["duration"])
+            vinfos.append([ep["shortTitle"] + "_" + str(sec), sec, ep["tvId"]])
+    return vinfos
+
 def get_vinfos_by_url(url):
-    pass
+    locale = check_url_locale(url)
+    patterns = [".+?/w_(\w+?).html", ".+?/v_(\w+?).html", ".+?/a_(\w+?).html", ".+?/lib/m_(\w+?).html"]
+    isw, isep, isas, isms = [re.match(pattern, url) for pattern in patterns]
+    if isw is None and isep is None and isas is None and isms is None:
+        return None
+    try:
+        r = requests.get(url, headers=chrome, timeout=5).content.decode("utf-8")
+    except Exception as e:
+        print("get_vinfos_by_url error info -->", e)
+        return None
+    cid_patterns = ["[\s\S]+?\.cid.+?(\d+)", "[\s\S]+?cid: \"(\d+)\"", "[\s\S]+?channelID.+?\"(\d+)\""]
+    cid = matchit(cid_patterns, r)
+    aid_patterns = ["[\s\S]+?aid:'(\d+)'", "[\s\S]+?albumid=\"(\d+)\"", "[\s\S]+?movlibalbumaid=\"(\d+)\"", "[\s\S]+?data-score-tvid=\"(\d+)\""]
+    aid = matchit(aid_patterns, r)
+    tvid_patterns = ["[\s\S]+?\"tvid\":\"(\d+)\"", "[\s\S]+?\['tvid'\].+?\"(\d+)\""]
+    tvid = matchit(tvid_patterns, r)
+    if cid is None:
+        cid = ""
+    elif cid == "6" and isas or isms:#对于综艺合集需要获取年份
+        # year_patterns = ["[\s\S]+?datePublished.+?(\d\d\d\d)-\d\d-\d\d", "[\s\S]+?data-year=\"(\d+)\""]
+        # year = matchit(year_patterns, r)
+        # if year is None:
+        #     years = [localtime().tm_year]
+        # else:
+        #     years = [year]
+        years = get_year_range(aid, locale=locale)
+    else:
+        pass#暂时没有其他的情况计划特别处理
+
+    if isep or isw:
+        if tvid is None:
+            return
+        return get_vinfo_by_tvid(tvid, locale=locale)
+
+    if isas or isms:
+        if aid is None:
+            return
+        if cid == "6":
+            return get_vinfos_by_year(aid, years, locale=locale)
+        else:
+            return get_vinfos(aid, locale=locale)

 #-------------------------------------------iqiyi--------------------------------------------
+
+#-------------------------------------------youku--------------------------------------------
+
+def get_vinfos_by_url_youku(url, isall=False):
+    vid_patterns = ["[\s\S]+?youku.com/video/id_(/+?)\.html", "[\s\S]+?youku.com/v_show/id_(.+?)\.html"]
+    video_id = matchit(vid_patterns, url)
+    show_id_patterns = ["[\s\S]+?youku.com/v_nextstage/id_(/+?)\.html", "[\s\S]+?youku.com/show/id_z(.+?)\.html", "[\s\S]+?youku.com/show_page/id_z(.+?)\.html", "[\s\S]+?youku.com/alipay_video/id_(.+?)\.html"]
+    show_id = matchit(show_id_patterns, url)
+    if video_id is None and show_id is None:
+        return None
+    if video_id:
+        return get_vinfos_by_video_id(video_id, isall=isall)
+    if show_id.__len__() == 20 and show_id == show_id.lower():
+        return get_vinfos_by_show_id(show_id)
+    else:
+        return get_vinfos_by_video_id(show_id, isall=isall)
+
+def get_vinfos_by_video_id(video_id, isall=False):
+    api_url = "https://openapi.youku.com/v2/videos/show.json?client_id=53e6cc67237fc59a&package=com.huawei.hwvplayer.youku&ext=show&video_id={}".format(video_id)
+    try:
+        r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
+    except Exception as e:
+        print("get_vinfos_by_video_id error info -->", e)
+        return None
+    data = json.loads(r)
+    if isall:
+        show_id = data["show"]["id"]
+        return get_vinfos_by_show_id(show_id)
+    duration = 0
+    if data.get("duration"):
+        duration = int(float(data["duration"]))
+    if data.get("title"):
+        name = data["title"] + "_" + str(duration)
+    else:
+        name = "优酷未知" + "_" + str(duration)
+    vinfo = [name, duration, video_id]
+    return [vinfo]
+
+def get_vinfos_by_show_id(show_id):
+    api_url = "https://openapi.youku.com/v2/shows/videos.json?show_videotype=正片&count=100&client_id=53e6cc67237fc59a&page=1&show_id={}&package=com.huawei.hwvplayer.youku".format(show_id)
+    try:
+        r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
+    except Exception as e:
+        print("get_vinfos_by_show_id error info -->", e)
+        return None
+    data = json.loads(r)["videos"]
+    if data.__len__() == 0:
+        return None
+    vinfos = []
+    for video in data:
+        duration = 0
+        if video.get("duration"):
+            duration = int(float(video["duration"]))
+        if video.get("title"):
+            name = video["title"] + "_" + str(duration)
+        else:
+            name = "优酷未知_{}".format(video["id"]) + "_" + str(duration)
+        vinfos.append([name, duration, video["id"]])
+    return vinfos
+#-------------------------------------------youku--------------------------------------------
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+requests==2.22.0
+Pillow==7.0.0
+xmltodict==0.12.0
--- a/sites/iqiyi.py
+++ b/sites/iqiyi.py
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2019-12-18 09:48:36
-# 上次编辑时间       : 2020-01-04 17:54:46
+# 创建日期: 2020-01-04 19:14:41
+# 上次编辑时间       : 2020-01-16 19:58:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

@@ -14,25 +14,10 @@ from zlib import decompress
 from xmltodict import parse

 from basic.vars import iqiyiplayer
-from basic.ass import check_content
-from pfunc.dump_to_ass import check_file
-from pfunc.request_info import get_vinfos
+from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+from pfunc.request_info import get_vinfos, get_vinfos_by_url, get_vinfo_by_tvid


-def get_vinfo_by_tvid(tvid):
-    api_url = "https://pcw-api.iqiyi.com/video/video/baseinfo/{}".format(tvid)
-    try:
-        r = requests.get(api_url, headers=iqiyiplayer).content.decode("utf-8")
-    except Exception as e:
-        print("error info -->", e)
-        return
-    data = json.loads(r)["data"]
-    if data.__class__ != dict:
-        return None
-    name = data["name"]
-    duration = data["durationSec"]
-    return [name + "_" + str(duration), duration, tvid]
-
 def get_danmu_by_tvid(name, duration, tvid):
    # http://cmts.iqiyi.com/bullet/41/00/10793494100_300_3.z
    if tvid.__class__ == int:
@@ -50,7 +35,11 @@ def get_danmu_by_tvid(name, duration, tvid):
        except Exception as e:
            print("error info -->", e)
            continue
+        try:
            raw_xml = decompress(bytearray(r), 15+32).decode('utf-8')
+        except Exception as e:
+            index += 1
+            continue
        try:
            entry = parse(raw_xml)["danmu"]["data"]["entry"]
        except Exception as e:
@@ -58,21 +47,17 @@ def get_danmu_by_tvid(name, duration, tvid):
            continue
        # with open("raw_xml.json", "w", encoding="utf-8") as f:
        #     f.write(json.dumps(parse(raw_xml), ensure_ascii=False, indent=4))
-        contents = []
        if entry.__class__ != list:
            entry = [entry]
        for comment in entry:
+            if comment.get("list") is None:
+                continue
            bulletInfo = comment["list"]["bulletInfo"]
            if bulletInfo.__class__ != list:
                bulletInfo = [bulletInfo]
            for info in bulletInfo:
-                content = check_content(info["content"], contents)
-                if content is None:
-                    continue
-                else:
-                    contents.append(content)
                color = [info["color"]]
-                comments.append([content, color, int(comment["int"])])
+                comments.append([info["content"], color, int(comment["int"])])
        print("已下载{:.2f}%".format(index * timestamp * 100 / duration))
        index += 1
    comments = sorted(comments, key=lambda _: _[-1])
@@ -89,17 +74,20 @@ def main(args):
        vi = get_vinfos(args.aid)
        if vi:
            vinfos += vi
-    # if args.url:
-    #     vi = get_vinfos_by_url(args.url)
-    #     if vi:
-    #         vinfos += vi
+    if args.tvid == "" and args.aid == "" and args.url == "":
+        args.url = input("请输入iqiyi链接：\n")
+    if args.url:
+        vi = get_vinfos_by_url(args.url)
+        if vi:
+            vinfos += vi
    subtitles = {}
    for name, duration, tvid in vinfos:
        print(name, "开始下载...")
-        flag, file_path = check_file(name, skip=args.y)
+        flag, file_path = check_file(name, args)
        if flag is False:
            print("跳过{}".format(name))
-            return
+            continue
        comments = get_danmu_by_tvid(name, duration, tvid)
+        comments = write_one_video_subtitles(file_path, comments, args)
        subtitles.update({file_path:comments})
    return subtitles
--- a/sites/qq.py
+++ b/sites/qq.py
@@ -1,9 +1,9 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3.7
 # coding=utf-8
 '''
 # 作者: weimo
-# 创建日期: 2019-12-18 09:37:15
-# 上次编辑时间       : 2020-01-04 17:53:28
+# 创建日期: 2020-01-04 19:14:37
+# 上次编辑时间       : 2020-01-16 20:04:51
 # 一个人的命运啊,当然要靠自我奋斗,但是...
 '''

@@ -13,8 +13,8 @@ import json
 import requests

 from basic.vars import qqlive
-from basic.ass import check_content
-from pfunc.dump_to_ass import check_file
+from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+from pfunc.request_info import get_cid_by_vid
 from pfunc.request_info import get_all_vids_by_cid as get_vids
 from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id

@@ -80,13 +80,7 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
            # timestamp不变 再试一次
            continue
        danmu_count = danmu["count"]
-        contents = []
        for comment in danmu["comments"]:
-            content = check_content(comment["content"], contents)
-            if content is None:
-                continue
-            else:
-                contents.append(content)
            if comment["content_style"]:
                style = json.loads(comment["content_style"])
                if style.get("gradient_colors"):
@@ -97,17 +91,17 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
                    color = ["ffffff"]
            else:
                color = ["ffffff"]
-            comments.append([content, color, comment["timepoint"]])
+            comments.append([comment["content"], color, comment["timepoint"]])
        print("已下载{:.2f}%".format(params["timestamp"]*100/duration))
        params["timestamp"] += 30
    comments = sorted(comments, key=lambda _: _[-1])
    return comments

    
-def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=False):
+def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, args=""):
    vid, name, duration, target_id = vinfo
    print(name, "开始下载...")
-    flag, file_path = check_file(name, skip=skip)
+    flag, file_path = check_file(name, args)
    if flag is False:
        print("跳过{}".format(name))
        return
@@ -115,7 +109,7 @@ def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=Fal
    # print("{}弹幕下载完成！".format(name))
    return comments, file_path

-def ask_input(url=""):
+def ask_input(url="", isall=False):
    if url == "":
        url = input("请输入vid/coverid/链接，输入q退出：\n").strip()
    if url == "q" or url == "":
@@ -124,6 +118,9 @@ def ask_input(url=""):
    params = url.replace(".html", "").split("/")
    if params[-1].__len__() == 11:
        vids = [params[-1]]
+        if isall:
+            cid = get_cid_by_vid(params[-1])
+            vids += get_vids(cid)
    elif params[-1].__len__() == 15:
        cid = params[-1]
        vids = get_vids(cid)
@@ -139,6 +136,9 @@ def ask_input(url=""):

 def main(args):
    vids = []
+    isall = False
+    if args.series:
+        isall = True
    if args.cid and args.cid.__len__() == 15:
        vids += get_vids(args.cid)
    if args.vid:
@@ -148,16 +148,30 @@ def main(args):
            vids += [vid for vid in args.vid.strip().replace(" ", "").split(",") if vid.__len__() == 11]
        else:
            pass
+        if args.series:
+            cid = get_cid_by_vid(args.vid)
+            vids += get_vids(cid)
    if args.url:
-        vids += ask_input(url=args.url)
+        vids += ask_input(url=args.url, isall=isall)
    if args.vid == "" and args.cid == "" and args.url == "":
-        vids += ask_input()
+        vids += ask_input(isall=isall)
    if vids.__len__() <= 0:
        sys.exit("没有任何有效输入")
+    vids_bak = vids
+    vids = []
+    for vid in vids_bak:
+        if vid in vids:
+            continue
+        else:
+            vids.append(vid)
    vinfos = get_video_info_by_vid(vids)
    subtitles = {}
    for vinfo in vinfos:
-        comments, file_path = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y)
+        infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args=args)
+        if infos is None:
+            continue
+        comments, file_path = infos
+        comments = write_one_video_subtitles(file_path, comments, args)
        subtitles.update({file_path:comments})
    return subtitles

--- a/sites/sohu.py
+++ b/sites/sohu.py
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3.7
+# coding=utf-8
+'''
+# 作者: weimo
+# 创建日期: 2020-01-16 17:45:35
+# 上次编辑时间       : 2020-01-16 20:09:22
+# 一个人的命运啊,当然要靠自我奋斗,但是...
+'''
+import json
+import requests
+
+from basic.vars import chrome
+from pfunc.request_info import matchit
+from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+
+def try_decode(content):
+    flag = False
+    methods = ["gbk", "utf-8"]
+    for method in methods:
+        try:
+            content_decode = content.decode(method)
+        except Exception as e:
+            print("try {} decode method failed.".format(method))
+            continue
+        flag = True
+        break
+    if flag is True:
+        return content_decode
+    else:
+        return None
+
+def get_vinfos_by_url(url: str):
+    ep_url = matchit(["[\s\S]+?tv.sohu.com/v/(.+?)\.html", "[\s\S]+?tv.sohu.com/(.+?)/(.+?)\.html"], url)
+    aid_url = matchit(["[\s\S]+?tv.sohu.com/album/.(\d+)\.shtml"], url)
+    vid_url = matchit(["[\s\S]+?tv.sohu.com/v(\d+)\.shtml"], url)
+    if ep_url:
+        try:
+            r = requests.get(url, headers=chrome, timeout=3).content
+        except Exception as e:
+            print(e)
+            print("get sohu (url -> {}) ep url failed.".format(url))
+            return
+        r_decode = try_decode(r)
+        if r_decode is None:
+            print("ep response use decode failed(url -> {}).".format(url))
+            return None
+        vid = matchit(["[\s\S]+?var vid.+?(\d+)"], r_decode)
+        if vid:
+            vinfo = get_vinfo_by_vid(vid)
+            if vinfo is None:
+                return
+            else:
+                return [vinfo]
+        else:
+            print("match sohu vid (url -> {}) failed.".format(url))
+            return None
+    if aid_url:
+        return get_vinfos(aid_url)
+    if vid_url:
+        vinfo = get_vinfo_by_vid(vid_url)
+        if vinfo is None:
+            return
+        else:
+            return [vinfo]
+    if ep_url is None and aid_url is None and vid_url is None:
+        # 可能是合集页面
+        try:
+            r = requests.get(url, headers=chrome, timeout=3).content
+        except Exception as e:
+            print("get sohu (url -> {}) album url failed.".format(url))
+            return
+        r_decode = try_decode(r)
+        if r_decode is None:
+            print("album response decode failed(url -> {}).".format(url))
+            return None
+        aid = matchit(["[\s\S]+?var playlistId.+?(\d+)"], r_decode)
+        if aid:
+            return get_vinfos(aid)
+    return
+
+
+def get_vinfos(aid: str):
+    api_url = "https://pl.hd.sohu.com/videolist"
+    params = {
+        "callback": "",
+        "playlistid": aid,
+        "o_playlistId": "",
+        "pianhua": "0",
+        "pagenum": "1",
+        "pagesize": "999",
+        "order": "0", # 0 从小到大
+        "cnt": "1",
+        "pageRule": "2",
+        "withPgcVideo": "0",
+        "ssl": "0",
+        "preVideoRule": "3",
+        "_": "" # 1579167883430
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("gbk")
+    except Exception as e:
+        print("get sohu (vid -> {}) videolist failed.".format(vid))
+        return None
+    data = json.loads(r)
+    if data.get("videos"):
+        videos = data["videos"]
+    else:
+        print("videolist has no videos (aid -> {}).".format(aid))
+        return None
+    vinfos = [[video["name"], int(float(video["playLength"])), video["vid"], aid] for video in videos]
+    return vinfos
+
+
+def get_vinfo_by_vid(vid: str):
+    api_url = "https://hot.vrs.sohu.com/vrs_flash.action"
+    params = {
+        "vid": vid,
+        "ver": "31",
+        "ssl": "1",
+        "pflag": "pch5"
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
+    except Exception as e:
+        print("get sohu (vid -> {}) vinfo failed.".format(vid))
+        return None
+    data = json.loads(r)
+    if data.get("status") == 1:
+        aid = ""
+        if data.get("pid"):
+            aid = str(data["pid"])
+        if data.get("data"):
+            data = data["data"]
+        else:
+            print("vid -> {} vinfo request return no data.".format(vid))
+            return
+    else:
+        print("vid -> {} vinfo request return error.".format(vid))
+        return
+    return [data["tvName"], int(float(data["totalDuration"])), vid, aid]
+
+def get_danmu_all_by_vid(vid: str, aid: str, duration: int):
+    api_url = "https://api.danmu.tv.sohu.com/dmh5/dmListAll"
+    params = {
+        "act": "dmlist_v2",
+        "dct": "1",
+        "request_from": "h5_js",
+        "vid": vid,
+        "page": "1",
+        "pct": "2",
+        "from": "PlayerType.SOHU_VRS",
+        "o": "4",
+        "aid": aid,
+        "time_begin": "0",
+        "time_end": str(duration)
+    }
+    try:
+        r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
+    except Exception as e:
+        print("get sohu (vid -> {}) danmu failed.".format(vid))
+        return None
+    data = json.loads(r)["info"]["comments"]
+    comments = []
+    for comment in data:
+        comments.append([comment["c"], "ffffff", comment["v"]])
+    comments = sorted(comments, key=lambda _: _[-1])
+    return comments
+
+def main(args):
+    vinfos = []
+    if args.vid:
+        vi = get_vinfo_by_vid(args.vid)
+        if vi:
+            vinfos.append(vi)
+    if args.aid:
+        vi = get_vinfos(args.aid)
+        if vi:
+            vinfos += vi
+    if args.vid == "" and args.aid == "" and args.url == "":
+        args.url = input("请输入sohu链接：\n")
+    if args.url:
+        vi = get_vinfos_by_url(args.url)
+        if vi:
+            vinfos += vi
+    subtitles = {}
+    for name, duration, vid, aid in vinfos:
+        print(name, "开始下载...")
+        flag, file_path = check_file(name, args)
+        if flag is False:
+            print("跳过{}".format(name))
+            continue
+        comments = get_danmu_all_by_vid(vid, aid, duration)
+        if comments is None:
+            print(name, "弹幕获取失败了，记得重试~(@^_^@)~")
+            continue
+        comments = write_one_video_subtitles(file_path, comments, args)
+        subtitles.update({file_path:comments})
+        print(name, "下载完成！")
+    return subtitles
--- a/sites/youku.py
+++ b/sites/youku.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3.7
+# coding=utf-8
+'''
+# 作者: weimo
+# 创建日期: 2020-01-05 14:52:21
+# 上次编辑时间       : 2020-01-16 19:59:08
+# 一个人的命运啊,当然要靠自我奋斗,但是...
+'''
+import re
+import time
+import json
+import base64
+import requests
+
+from basic.vars import chrome
+from pfunc.dump_to_ass import check_file, write_one_video_subtitles
+from pfunc.cfunc import yk_msg_sign, yk_t_sign
+from pfunc.request_info import get_vinfos_by_show_id, get_vinfos_by_video_id, get_vinfos_by_url_youku
+
+def get_tk_enc():
+    """
+    获取优酷的_m_h5_tk和_m_h5_tk_enc
+    """
+    api_url = "https://acs.youku.com/h5/mtop.com.youku.aplatform.weakget/1.0/?jsv=2.5.1&appKey=24679788"
+    try:
+        r = requests.get(api_url, headers=chrome, timeout=5)
+    except Exception as e:
+        return
+    tk_enc = dict(r.cookies)
+    if tk_enc.get("_m_h5_tk_enc") and tk_enc.get("_m_h5_tk"):
+        return tk_enc
+    return
+
+def get_cna():
+    api_url = "https://log.mmstat.com/eg.js"
+    try:
+        r = requests.get(api_url, headers=chrome, timeout=5)
+    except Exception as e:
+        return
+    cookies = dict(r.cookies)
+    if cookies.get("cna"):
+        return cookies["cna"]
+    return
+
+def get_danmu_by_mat(vid, cna, mat: int, comments: list):
+    api_url = "https://acs.youku.com/h5/mopen.youku.danmu.list/1.0/"
+    tm = str(int(time.time() * 1000))
+    msg = {
+        "ctime": tm,
+        "ctype": 10004,
+        "cver": "v1.0",
+        "guid": cna,
+        "mat": mat,
+        "mcount": 1,
+        "pid": 0,
+        "sver": "3.1.0",
+        "type": 1,
+        "vid": vid}
+    msg_b64encode = base64.b64encode(json.dumps(msg, separators=(',', ':')).encode("utf-8")).decode("utf-8")
+    msg.update({"msg":msg_b64encode})
+    msg.update({"sign":yk_msg_sign(msg_b64encode)})
+    # 测试发现只要有Cookie的_m_h5_tk和_m_h5_tk_enc就行
+    tk_enc = get_tk_enc()
+    if tk_enc is None:
+        return
+    headers = {
+        "Content-Type":"application/x-www-form-urlencoded",
+        "Cookie":";".join([k + "=" + v for k, v in tk_enc.items()]),
+        "Referer": "https://v.youku.com"
+    }
+    headers.update(chrome)
+    t = str(int(time.time() * 1000))
+    data = json.dumps(msg, separators=(',', ':'))
+    params = {
+        "jsv":"2.5.6",
+        "appKey":"24679788",
+        "t":t,
+        "sign":yk_t_sign(tk_enc["_m_h5_tk"][:32], t, "24679788", data),
+        "api":"mopen.youku.danmu.list",
+        "v":"1.0",
+        "type":"originaljson",
+        "dataType":"jsonp",
+        "timeout":"20000",
+        "jsonpIncPrefix":"utility"
+    }
+    try:
+        r = requests.post(api_url, params=params, data={"data":data}, headers=headers, timeout=5).content.decode("utf-8")
+    except Exception as e:
+        print("youku danmu request failed.", e)
+        return "once again"
+    result = json.loads(json.loads(r)["data"]["result"])["data"]["result"]
+    for item in result:
+        comment = item["content"]
+        c_int = json.loads(item["propertis"])["color"]
+        if c_int.__class__ == str:
+            c_int = int(c_int)
+        color = hex(c_int)[2:].zfill(6)
+        timepoint = item["playat"] / 1000
+        comments.append([comment, [color], timepoint])
+    return comments
+
+def main(args):
+    cna = get_cna()
+    if cna is None:
+        # 放前面 免得做无用功
+        return
+    isall = False
+    if args.series:
+        isall = True
+    vinfos = []
+    if args.url:
+        vi = get_vinfos_by_url_youku(args.url, isall=isall)
+        if vi:
+            vinfos += vi
+    if args.vid:
+        vi = get_vinfos_by_video_id(args.vid, isall=isall)
+        if vi:
+            vinfos += vi
+    subtitles = {}
+    for name, duration, video_id in vinfos:
+        print(name, "开始下载...")
+        flag, file_path = check_file(name, args=args)
+        if flag is False:
+            print("跳过{}".format(name))
+            continue
+        max_mat = duration // 60 + 1
+        comments = []
+        for mat in range(max_mat):
+            result = get_danmu_by_mat(video_id, cna, mat + 1, comments)
+            if result is None:
+                continue
+            elif result == "once again":
+                # 可能改成while好点
+                result = get_danmu_by_mat(video_id, cna, mat + 1, comments)
+                if result is None:
+                    continue
+            comments = result
+            print("已下载{}/{}".format(mat + 1, max_mat))
+        comments = write_one_video_subtitles(file_path, comments, args)
+        subtitles.update({file_path:comments})
+    return subtitles
Author	SHA1	Message	Date
xhlove	7ce2a35be9	fix bug	2020-01-16 20:17:57 +08:00
xhlove	986ec2b9fe	增加搜狐视频弹幕下载并改进输入提示	2020-01-16 20:12:07 +08:00
xhlove	3cfccc1c3c	readme update	2020-01-11 18:46:16 +08:00
xhlove	11edd6dfad	增加优酷弹幕下载以及其他改进	2020-01-11 17:57:51 +08:00
xhlove	78083c2295	增加爱奇艺链接方式解析	2020-01-05 14:49:33 +08:00