12 Commits
1.2 ... master

Author SHA1 Message Date
xhlove
6ab886dd8f iqiyi bug fix 2020-02-08 21:40:42 +08:00
xhlove
71c0212fe5 bug fix 2020-02-07 19:27:13 +08:00
xhlove
e5de08605c readme update 2020-02-07 19:11:28 +08:00
xhlove
1c31057b9d 完善说明,爱奇艺支持series命令,增加弹幕区间设定 2020-02-07 19:05:58 +08:00
xhlove
27de5ce4a3 readme update 2020-01-28 20:18:42 +08:00
xhlove
257b9655f0 新增芒果TV支持 2020-01-28 20:12:30 +08:00
xhlove
0bd66c894e iqiyi ep bug fix 2020-01-21 12:40:11 +08:00
xhlove
7ce2a35be9 fix bug 2020-01-16 20:17:57 +08:00
xhlove
986ec2b9fe 增加搜狐视频弹幕下载并改进输入提示 2020-01-16 20:12:07 +08:00
xhlove
3cfccc1c3c readme update 2020-01-11 18:46:16 +08:00
xhlove
11edd6dfad 增加优酷弹幕下载以及其他改进 2020-01-11 17:57:51 +08:00
xhlove
78083c2295 增加爱奇艺链接方式解析 2020-01-05 14:49:33 +08:00
17 changed files with 1106 additions and 122 deletions

1
.gitignore vendored
View File

@@ -1,6 +1,7 @@
# 额外
.vscode/
releases/
test/
*.ass
methods/calc_danmu_pos.py

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-04 12:59:11
# 上次编辑时间 : 2020-01-04 20:14:39
# 创建日期: 2020-01-04 19:14:39
# 上次编辑时间 : 2020-02-07 19:10:02
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
@@ -13,10 +13,12 @@ from argparse import ArgumentParser
from sites.qq import main as qq
from sites.iqiyi import main as iqiyi
from basic.ass import get_ass_head, check_font
from pfunc.dump_to_ass import write_lines_to_file
from methods.assbase import ASS
from methods.sameheight import SameHeight
from sites.youku import main as youku
from sites.sohu import main as sohu
from sites.mgtv import main as mgtv
from pfunc.cfunc import check_url_site
from basic.vars import ALLOW_SITES
# -------------------------------------------
# 基本流程
@@ -29,31 +31,49 @@ from methods.sameheight import SameHeight
def main():
parser = ArgumentParser(description="视频网站弹幕转换/下载工具任何问题请联系vvtoolbox.dev@gmail.com")
parser = ArgumentParser(description="视频网站弹幕转换/下载工具,项目地址https://github.com/xhlove/GetDanMu任何问题请联系vvtoolbox.dev@gmail.com")
parser.add_argument("-f", "--font", default="微软雅黑", help="指定输出字幕字体")
parser.add_argument("-fs", "--font-size", default=28, help="指定输出字幕字体大小")
parser.add_argument("-s", "--site", default="qq", help="指定网站")
parser.add_argument("-cid", "--cid", default="", help="下载cid对应视频的弹幕腾讯视频合集")
parser.add_argument("-s", "--site", default="", help=f"使用非url方式下载需指定网站 支持的网站 -> {' '.join(ALLOW_SITES)}")
parser.add_argument("-r", "--range", default="0,720", help="指定弹幕的纵向范围 默认0到720 请用逗号隔开")
parser.add_argument("-cid", "--cid", default="", help="下载cid对应视频的弹幕腾讯 芒果视频合集)")
parser.add_argument("-vid", "--vid", default="", help="下载vid对应视频的弹幕支持同时多个vid需要用逗号隔开")
parser.add_argument("-aid", "--aid", default="", help="下载aid对应视频的弹幕爱奇艺合集")
parser.add_argument("-tvid", "--tvid", default="", help="下载tvid对应视频的弹幕支持同时多个tvid需要用逗号隔开")
parser.add_argument("-series", "--series", action="store_true", help="尝试通过单集得到合集的全部弹幕")
parser.add_argument("-u", "--url", default="", help="下载视频链接所指向视频的弹幕")
parser.add_argument("-y", "--y", action="store_true", help="覆盖原有弹幕而不提示")
parser.add_argument("-y", "--y", action="store_true", help="默认覆盖原有弹幕而不提示")
args = parser.parse_args()
# print(args.__dict__)
font_path, font_style_name = check_font(args.font)
ass_head = get_ass_head(font_style_name, args.font_size)
init_args = sys.argv
imode = "command_line"
if init_args.__len__() == 1:
# 双击运行或命令执行exe文件时 传入参数只有exe的路径
# 命令行下执行会传入exe的相对路径在exe所在路径执行时 传入完整路径非exe所在路径下执行
# 双击运行exe传入完整路径
imode = "non_command_line"
if imode == "non_command_line":
content = input("请输入链接:\n")
check_tip = check_url_site(content)
if check_tip is None:
sys.exit("不支持的网站")
args.url = content
args.site = check_tip
# 要么有url 要么有site和相关参数的组合
if args.url != "":
args.site = check_url_site(args.url)
elif args.site == "":
sys.exit("请传入链接或指定网站+视频相关的参数")
if args.site == "qq":
subtitles = qq(args)
if args.site == "iqiyi":
subtitles = iqiyi(args)
for file_path, comments in subtitles.items():
get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", font_path=font_path, font_size=args.font_size)
subtitle = ASS(file_path, get_xy_obj, font=font_style_name)
for comment in comments:
subtitle.create_new_line(comment)
write_lines_to_file(ass_head, subtitle.lines, file_path)
if args.site == "youku":
subtitles = youku(args)
if args.site == "sohu":
subtitles = sohu(args)
if args.site == "mgtv":
subtitles = mgtv(args)
if __name__ == "__main__":
# 打包 --> pyinstaller GetDanMu.spec

103
README.md
View File

@@ -1,2 +1,103 @@
# GetDanMu
转换/下载各类视频的弹幕
[转换/下载各类视频弹幕的工具][1]
项目主页https://github.com/xhlove/GetDanMu
## 网站支持
| Site | URL | 单集? | 合集? | 综艺合集? | 支持series? |
| :--: | :-- | :-----: | :-----: | :-----: | :-----: |
| **腾讯视频** | <https://v.qq.com/> |✓|✓| |
| **爱奇艺** | <https://www.iqiyi.com/> |✓|✓|✓|✓|
| **优酷** | <https://v.youku.com/> |✓|✓|✓|✓|
| **搜狐视频** | <https://tv.sohu.com/> |✓|✓|||
| **芒果TV** | <https://www.mgtv.com/> |✓|✓|✓|✓|
# 使用示例
- 命令(建议)
> GetDanMu.exe -s mgtv -r 20,960 -series -u https://www.mgtv.com/b/334727/7452407.html
- 双击运行
> 提示逻辑有待完善
- 选项说明
> -f或--font 指定输出字幕字体,默认微软雅黑
> -fs或--font-size 指定输出字幕字体大小默认28
> -s或--site 使用非url方式下载需指定网站 支持的网站 -> qq iqiyi youku sohu mgtv
> -r或--range 指定弹幕的纵向范围 默认0到720请用逗号隔开
> -cid或--cid 下载cid对应视频的弹幕腾讯 芒果视频合集)
> -vid或--vid 下载vid对应视频的弹幕支持同时多个vid需要用逗号隔开
> -aid或--aid 下载aid对应视频的弹幕爱奇艺合集
> -tvid或--tvid 下载tvid对应视频的弹幕支持同时多个tvid需要用逗号隔开
> -series或--series 尝试通过单集得到合集的全部弹幕 默认不使用
> -u或--url 下载视频链接所指向视频的弹幕
> -y或--y 覆盖原有弹幕而不提示 默认不使用
- 字体配置文件(可选)
新建名为`config.json`的文件,内容形式如下:
```json
{
"fonts_base_folder": "C:/Windows/Fonts",
"fonts": {
"微软雅黑":"msyh.ttc",
"微软雅黑粗体":"msyhbd.ttc",
"微软雅黑细体":"msyhl.ttc"
}
}
```
# 效果示意(字幕与视频不相关)
![potplayer截屏](http://puui.qpic.cn/vshpic/0/5TLOX3WbgjudEj61IxYZ4tAuf2lFwl-ynf4S5T4sXkdjS9cd_0/0)
[查看使用演示视频点我][2]
注意有背景音乐
演示是直接使用的python命令使用exe的话把python GetDanMu.py换成GetDanMu.exe即可
## 可能存在的问题
- 下载进度接近100%时暂时没有反应
这是因为在全部弹幕获取完后一次性处理所致,对于时间过长和弹幕过多的视频,处理耗时较多,属于正常现象。
- 命令组合未达到预期效果
当前的逻辑并不完善,如果出现这种现象请反馈给我。
# 更新日志
## 2020/2/8
- 爱奇艺bug修复
## 2020/2/7
- 完善说明
- 爱奇艺支持series选项并完善地区判断
- 增加字体配置文件,建立字体名称与实际字体文件的映射关系,用于预先设定,方便更准确计算弹幕的分布
- 增加自定义弹幕区间选项,即-r或--range命令
- README完善
## 2020/1/28
- 增加芒果TV的支持支持综艺合集、支持series命令
- 爱奇艺bug修复
## 2020/1/16
- 增加搜狐视频的支持(剧集)
- 改进输入提示(双击运行时)
- 腾讯支持-series设定
## 2020/1/11
- 增加优酷弹幕下载,支持合集,支持通过单集直接下载合集弹幕(暂时仅限优酷)
- 改进去重方式
- 优酷的视频id用vid指代若下载合集请使用连接或通过`-series`选项下载合集弹幕
- 加入下载进度显示,后续可能改进为单行刷新
## 2020/1/5
- 增加了通过链接下载爱奇艺视频弹幕的方法,支持综艺合集。
- 增加通过链接判断网站
[赞助点此][3]
[1]: https://blog.weimo.info/archives/431/
[2]: https://alime-customer-upload-cn-hangzhou.oss-cn-hangzhou.aliyuncs.com/customer-upload/1581073011183_8t14dpgg2bdc.mp4
[3]: https://afdian.net/@vvtoolbox_dev

View File

@@ -1,17 +1,16 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-04 13:05:23
# 上次编辑时间 : 2020-01-04 15:52:11
# 创建日期: 2020-01-04 19:14:46
# 上次编辑时间 : 2020-02-07 19:21:19
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import os
import json
from basic.vars import fonts
ass_script = """[Script Info]
; Script generated by N
ScriptType: v4.00+
@@ -30,15 +29,30 @@ ass_events_head = """[Events]\nFormat: Layer, Start, End, Style, Name, MarginL,
# 基于当前时间范围在0~1000ms之间停留在(676.571,506.629)处在1000~3000ms内从位置1300,600移动到360,600原点在左上
# ass_baseline = """Dialogue: 0,0:20:08.00,0:20:28.00,Default,,0,0,0,,{\t(1000,3000,\move(1300,600,360,600))\pos(676.571,506.629)}这是字幕内容示意"""
def get_fonts_info():
global fonts
fonts_path = r"C:\Windows\Fonts"
if os.path.exists("config.json"):
with open("config.json", "r", encoding="utf-8") as f:
fr = f.read()
try:
config = json.loads(fr)
except Exception as e:
print("get_fonts_info error info ->", e)
else:
fonts_path = config["fonts_base_folder"]
fonts = config["fonts"]
return fonts_path, fonts
def get_ass_head(font_style_name, font_size):
ass_head = ass_script + "\n\n" + ass_style_head + "\n" + ass_style_base.format(font=font_style_name, font_size=font_size) + "\n\n" + ass_events_head
return ass_head
def check_font(font):
win_font_path = r"C:\Windows\Fonts"
maybe_font_path = os.path.join(win_font_path, font)
fonts_path, fonts = get_fonts_info()
maybe_font_path = os.path.join(fonts_path, font)
font_style_name = "微软雅黑"
font_path = os.path.join(win_font_path, fonts[font_style_name]) # 默认
font_path = os.path.join(fonts_path, fonts[font_style_name]) # 默认
if os.path.exists(font):
# 字体就在当前文件夹 或 完整路径
if os.path.isfile(font):
@@ -58,14 +72,8 @@ def check_font(font):
pass
elif fonts.get(font):
# 别名映射
font_path = os.path.join(win_font_path, fonts.get(font))
font_path = os.path.join(fonts_path, fonts.get(font))
font_style_name = font
else:
pass
return font_path, font_style_name
def check_content(content: str, comments: list):
content = content.replace(" ", "")
if content in comments:
return
return content
return font_path, font_style_name

View File

@@ -1,17 +1,23 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-04 13:16:18
# 上次编辑时间 : 2020-01-04 16:08:34
# 创建日期: 2020-01-04 19:14:35
# 上次编辑时间 : 2020-02-07 17:57:05
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
ALLOW_SITES = ["qq", "iqiyi", "youku", "sohu", "mgtv"]
qqlive = {
"User-Agent":"qqlive"
}
iqiyiplayer = {
"User-Agent":"Qiyi List Client PC 7.2.102.1343"
}
chrome = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
fonts = {
"微软雅黑":"msyh.ttc",
"微软雅黑粗体":"msyhbd.ttc",

8
config.json Normal file
View File

@@ -0,0 +1,8 @@
{
"fonts_base_folder": "C:/Windows/Fonts",
"fonts": {
"微软雅黑":"msyh.ttc",
"微软雅黑粗体":"msyhbd.ttc",
"微软雅黑细体":"msyhl.ttc"
}
}

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-04 13:01:04
# 上次编辑时间 : 2020-01-04 15:42:02
# 创建日期: 2020-01-04 19:14:32
# 上次编辑时间: 2020-01-05 14:46:27
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''

View File

@@ -1,22 +1,23 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2019-12-25 20:35:43
# 上次编辑时间 : 2019-12-25 23:23:32
# 创建日期: 2020-01-04 19:14:47
# 上次编辑时间 : 2020-02-07 18:40:42
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
from PIL.ImageFont import truetype
class SameHeight(object):
'''
# 等高弹幕 --> 矩形分割问题?
'''
def __init__(self, text, font_path="msyh.ttc", font_size=14):
def __init__(self, text, ass_range: str, font_path="msyh.ttc", font_size=14):
self.font = truetype(font=font_path, size=font_size)
self.width, self.height = self.get_danmu_size(text)
self.height_range = [0, 720]
self.height_range = [int(n.strip()) for n in ass_range.split(",")]
self.width_range = [0, 1920]
self.lines_start_y = list(range(*(self.height_range + [self.height])))
self.lines_width_used = [[y, 0] for y in self.lines_start_y]
@@ -48,7 +49,7 @@ class SameHeight(object):
def main():
text = "测试"
show_time = 13
sh = SameHeight(text)
sh = SameHeight(text, "0,720")
sh.get_xy(text, show_time)

50
pfunc/cfunc.py Normal file
View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-05 12:45:18
# 上次编辑时间 : 2020-01-16 14:50:34
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import hashlib
from urllib.parse import urlparse
from basic.vars import ALLOW_SITES
def remove_same_danmu(comments: list):
# 在原有基础上pop会引起索引变化 所以还是采用下面这个方式
contents = []
for comment in comments:
content, color, timepoint = comment
content = content.replace(" ", "")
if content in contents:
continue
else:
contents.append([content, color, timepoint])
return contents
def check_url_site(url):
site = urlparse(url).netloc.split(".")[-2]
if site in ALLOW_SITES:
return site
else:
return None
def check_url_locale(url):
flag = {
"cn":"zh_cn",
"tw":"zh_tw",
"intl":"intl"
}
if urlparse(url).netloc.split(".")[0] == "tw":
return flag["tw"]
else:
return flag["cn"]
def yk_msg_sign(msg: str):
return hashlib.new("md5", bytes(msg + "MkmC9SoIw6xCkSKHhJ7b5D2r51kBiREr", "utf-8")).hexdigest()
def yk_t_sign(token, t, appkey, data):
text = "&".join([token, t, appkey, data])
return hashlib.new('md5', bytes(text, 'utf-8')).hexdigest()

View File

@@ -1,26 +1,46 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-04 19:17:44
# 上次编辑时间 : 2020-01-04 19:30:24
# 上次编辑时间 : 2020-02-07 18:17:48
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import os
from basic.ass import get_ass_head, check_font
from methods.assbase import ASS
from methods.sameheight import SameHeight
from pfunc.cfunc import remove_same_danmu
def write_one_video_subtitles(file_path, comments, args):
# 对于合集则每次都都得检查一次 也可以放在上一级 放在这里 考虑后面可能特殊指定字体的情况
font_path, font_style_name = check_font(args.font)
ass_head = get_ass_head(font_style_name, args.font_size)
get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", args.range, font_path=font_path, font_size=int(args.font_size))
subtitle = ASS(file_path, get_xy_obj, font=font_style_name)
comments = remove_same_danmu(comments)
for comment in comments:
subtitle.create_new_line(comment)
write_lines_to_file(ass_head, subtitle.lines, file_path)
return comments
def write_lines_to_file(ass_head, lines, file_path):
with open(file_path, "a+", encoding="utf-8") as f:
f.write(ass_head + "\n")
for line in lines:
f.write(line + "\n")
def check_file(name, skip=False, fpath=os.getcwd()):
def check_file(name, args, fpath=os.getcwd()):
flag = True
file_path = os.path.join(fpath, name + ".ass")
if os.path.isfile(file_path):
if skip:
if args.y:
os.remove(file_path)
elif args.series:
# 存在重复的 那么直接pass认为已经下载好了
flag = False
return flag, file_path
else:
isremove = input("{}已存在,是否覆盖?(y/n)".format(file_path))
if isremove.strip() == "y":

View File

@@ -1,17 +1,18 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-04 13:15:25
# 上次编辑时间 : 2020-01-04 17:47:16
# 创建日期: 2020-01-04 19:14:43
# 上次编辑时间 : 2020-02-08 21:37:26
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import re
import json
import requests
from basic.vars import qqlive, iqiyiplayer
from time import localtime
from pfunc.cfunc import check_url_locale
from basic.vars import qqlive, iqiyiplayer, chrome
# 放一些仅通过某个id获取另一个/多个id的方法
@@ -26,7 +27,7 @@ def get_danmu_target_id_by_vid(vid: str):
try:
r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8")
except Exception as e:
print("error info -->", e)
print("target_id requests error info -->", e)
return None
data = json.loads(r.lstrip("QZOutputJson=").rstrip(";"))
target_id = None
@@ -39,6 +40,28 @@ def get_all_vids_by_column_id():
# 综艺类型的
pass
def get_cid_by_vid(vid):
api_url = "http://union.video.qq.com/fcgi-bin/data"
params = {
"tid": "98",
"appid": "10001005",
"appkey": "0d1a9ddd94de871b",
"idlist": vid,
"otype":"json"
}
r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8")
data = json.loads(r.lstrip("QZOutputJson=").rstrip(";"))
try:
cid = data["results"][0]["fields"]
except Exception as e:
print("load fields error info -->", e)
return None
if cid.get("sync_cover"):
return cid["sync_cover"]
elif cid.get("cover_list"):
return cid["cover_list"][0]
return
def get_all_vids_by_cid(cid):
api_url = "http://union.video.qq.com/fcgi-bin/data"
params = {
@@ -53,7 +76,7 @@ def get_all_vids_by_cid(cid):
try:
nomal_ids = json.loads(data["results"][0]["fields"]["nomal_ids"])
except Exception as e:
print("error info -->", e)
print("load nomal_ids error info -->", e)
return None
# F 2是免费 7是会员 0是最新正片之前的预告 4是正片之后的预告
vids = [item["V"] for item in nomal_ids if item["F"] in [2, 7]]
@@ -63,23 +86,215 @@ def get_all_vids_by_cid(cid):
#-------------------------------------------iqiyi--------------------------------------------
def get_vinfos(aid):
def get_vinfos(aid, locale="zh_cn"):
api_url = "http://cache.video.iqiyi.com/avlist/{}/0/".format(aid)
if locale != "zh_cn":
api_url += "?locale=" + locale
try:
r = requests.get(api_url, headers=iqiyiplayer).content.decode("utf-8")
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("error info -->", e)
print("get_vinfos requests error info -->", e)
return None
data = json.loads(r[len("var videoListC="):])
try:
vlist = data["data"]["vlist"]
except Exception as e:
print("error info -->", e)
print("get_vinfos load vlist error info -->", e)
return None
vinfos = [[v["shortTitle"] + "_" + str(v["timeLength"]), v["timeLength"], ["id"]] for v in vlist]
vinfos = [[v["shortTitle"] + "_" + str(v["timeLength"]), v["timeLength"], v["id"]] for v in vlist]
return vinfos
def get_vinfos_by_url(url):
pass
def matchit(patterns, text):
ret = None
for pattern in patterns:
match = re.match(pattern, text)
if match:
ret = match.group(1)
break
return ret
#-------------------------------------------iqiyi--------------------------------------------
def duration_to_sec(duration: str):
return sum(x * int(t) for x, t in zip([3600, 60, 1][2 - duration.count(":"):], duration.split(":")))
def get_year_range(aid, locale="zh_cn"):
# 获取第一个和最新一个视频的年份,生成列表返回,遇到任何错误则返回当前年份
year_start = year_end = localtime().tm_year
api_url = "http://pcw-api.iqiyi.com/album/album/baseinfo/{}".format(aid)
if locale != "zh_cn":
api_url += "?locale=" + locale
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("error info -->", e)
return list(range(year_start, year_end + 1))
data = json.loads(r)["data"]
if data.get("firstVideo"):
year_start = int(data["firstVideo"]["period"][:4])
if data.get("latestVideo"):
year_end = int(data["latestVideo"]["period"][:4])
return list(range(year_start, year_end + 1))
def get_vinfo_by_tvid(tvid, locale="zh_cn", isall=False):
api_url = "https://pcw-api.iqiyi.com/video/video/baseinfo/{}".format(tvid)
if locale != "zh_cn":
api_url += "?locale=" + locale
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("error info -->", e)
return
data = json.loads(r)["data"]
if data.__class__ != dict:
return None
if isall:
aid = data.get("albumId")
if aid is None:
print("通过单集tvid获取合集aid失败将只下载单集的弹幕")
locale = check_video_area_by_tvid(tvid)
if locale is None:
locale = "zh_cn"
return get_vinfos(aid, locale=locale)
name = data["name"]
duration = data["durationSec"]
return [[name + "_" + str(duration), duration, tvid]]
def check_video_area_by_tvid(tvid):
api_url = "https://pcw-api.iqiyi.com/video/video/playervideoinfo?tvid={}".format(tvid)
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("check_video_area_by_tvid error info -->", e)
return None
data = json.loads(r)["data"]
intl_flag = data["operation_base"]["is_international"]
langs = [item["language"].lower() for item in data["operation_language_base"]]
locale = "zh_cn"
if intl_flag is False and "zh_tw" in langs:
locale = "zh_tw"
return locale
def get_vinfos_by_year(aid, years: list, cid=6, locale="zh_cn"):
api_url = "https://pcw-api.iqiyi.com/album/source/svlistinfo?cid={}&sourceid={}&timelist={}".format(cid, aid, ",".join([str(_) for _ in years.copy()]))
if locale != "zh_cn":
api_url += "&locale=" + locale
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("get_vinfos_by_year error info -->", e)
return None
data = json.loads(r)["data"]
vinfos = []
for year in years:
if year.__class__ != str:
year = str(year)
if data.get(year) is None:
continue
for ep in data[year]:
sec = duration_to_sec(ep["duration"])
vinfos.append([ep["shortTitle"] + "_" + str(sec), sec, ep["tvId"]])
return vinfos
def get_vinfos_by_url(url, isall=False):
locale = check_url_locale(url)
patterns = [".+?/w_(\w+?).html", ".+?/v_(\w+?).html", ".+?/a_(\w+?).html", ".+?/lib/m_(\w+?).html"]
isw, isep, isas, isms = [re.match(pattern, url) for pattern in patterns]
if isw is None and isep is None and isas is None and isms is None:
return None
try:
r = requests.get(url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("get_vinfos_by_url error info -->", e)
return None
cid_patterns = ["[\s\S]+?\.cid.+?(\d+)", "[\s\S]+?cid: \"(\d+)\"", "[\s\S]+?channelID.+?\"(\d+)\""]
cid = matchit(cid_patterns, r)
aid_patterns = ["[\s\S]+?aid:'(\d+)'", "[\s\S]+?albumid=\"(\d+)\"", "[\s\S]+?movlibalbumaid=\"(\d+)\"", "[\s\S]+?data-score-tvid=\"(\d+)\""]
aid = matchit(aid_patterns, r)
tvid_patterns = ["[\s\S]+?\"tvid\":\"(\d+)\"", "[\s\S]+?\['tvid'\].+?\"(\d+)\""]
tvid = matchit(tvid_patterns, r)
if cid is None:
cid = ""
elif cid == "6" and isas or isms:#对于综艺合集需要获取年份
# year_patterns = ["[\s\S]+?datePublished.+?(\d\d\d\d)-\d\d-\d\d", "[\s\S]+?data-year=\"(\d+)\""]
# year = matchit(year_patterns, r)
# if year is None:
# years = [localtime().tm_year]
# else:
# years = [year]
years = get_year_range(aid, locale=locale)
else:
pass#暂时没有其他的情况计划特别处理
if isep or isw:
if tvid is None:
return
return get_vinfo_by_tvid(tvid, locale=locale, isall=isall)
if isas or isms:
if aid is None:
return
if cid == "6":
return get_vinfos_by_year(aid, years, locale=locale)
else:
return get_vinfos(aid, locale=locale)
#-------------------------------------------iqiyi--------------------------------------------
#-------------------------------------------youku--------------------------------------------
def get_vinfos_by_url_youku(url, isall=False):
vid_patterns = ["[\s\S]+?youku.com/video/id_(/+?)\.html", "[\s\S]+?youku.com/v_show/id_(.+?)\.html"]
video_id = matchit(vid_patterns, url)
show_id_patterns = ["[\s\S]+?youku.com/v_nextstage/id_(/+?)\.html", "[\s\S]+?youku.com/show/id_z(.+?)\.html", "[\s\S]+?youku.com/show_page/id_z(.+?)\.html", "[\s\S]+?youku.com/alipay_video/id_(.+?)\.html"]
show_id = matchit(show_id_patterns, url)
if video_id is None and show_id is None:
return None
if video_id:
return get_vinfos_by_video_id(video_id, isall=isall)
if show_id.__len__() == 20 and show_id == show_id.lower():
return get_vinfos_by_show_id(show_id)
else:
return get_vinfos_by_video_id(show_id, isall=isall)
def get_vinfos_by_video_id(video_id, isall=False):
api_url = "https://openapi.youku.com/v2/videos/show.json?client_id=53e6cc67237fc59a&package=com.huawei.hwvplayer.youku&ext=show&video_id={}".format(video_id)
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("get_vinfos_by_video_id error info -->", e)
return None
data = json.loads(r)
if isall:
show_id = data["show"]["id"]
return get_vinfos_by_show_id(show_id)
duration = 0
if data.get("duration"):
duration = int(float(data["duration"]))
if data.get("title"):
name = data["title"] + "_" + str(duration)
else:
name = "优酷未知" + "_" + str(duration)
vinfo = [name, duration, video_id]
return [vinfo]
def get_vinfos_by_show_id(show_id):
api_url = "https://openapi.youku.com/v2/shows/videos.json?show_videotype=正片&count=100&client_id=53e6cc67237fc59a&page=1&show_id={}&package=com.huawei.hwvplayer.youku".format(show_id)
try:
r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8")
except Exception as e:
print("get_vinfos_by_show_id error info -->", e)
return None
data = json.loads(r)["videos"]
if data.__len__() == 0:
return None
vinfos = []
for video in data:
duration = 0
if video.get("duration"):
duration = int(float(video["duration"]))
if video.get("title"):
name = video["title"] + "_" + str(duration)
else:
name = "优酷未知_{}".format(video["id"]) + "_" + str(duration)
vinfos.append([name, duration, video["id"]])
return vinfos
#-------------------------------------------youku--------------------------------------------

3
requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
requests==2.22.0
Pillow==7.0.0
xmltodict==0.12.0

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2019-12-18 09:48:36
# 上次编辑时间 : 2020-01-04 17:54:46
# 创建日期: 2020-01-04 19:14:41
# 上次编辑时间 : 2020-02-08 21:37:36
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
@@ -14,25 +14,10 @@ from zlib import decompress
from xmltodict import parse
from basic.vars import iqiyiplayer
from basic.ass import check_content
from pfunc.dump_to_ass import check_file
from pfunc.request_info import get_vinfos
from pfunc.dump_to_ass import check_file, write_one_video_subtitles
from pfunc.request_info import get_vinfos, get_vinfos_by_url, get_vinfo_by_tvid
def get_vinfo_by_tvid(tvid):
api_url = "https://pcw-api.iqiyi.com/video/video/baseinfo/{}".format(tvid)
try:
r = requests.get(api_url, headers=iqiyiplayer).content.decode("utf-8")
except Exception as e:
print("error info -->", e)
return
data = json.loads(r)["data"]
if data.__class__ != dict:
return None
name = data["name"]
duration = data["durationSec"]
return [name + "_" + str(duration), duration, tvid]
def get_danmu_by_tvid(name, duration, tvid):
# http://cmts.iqiyi.com/bullet/41/00/10793494100_300_3.z
if tvid.__class__ == int:
@@ -50,7 +35,11 @@ def get_danmu_by_tvid(name, duration, tvid):
except Exception as e:
print("error info -->", e)
continue
raw_xml = decompress(bytearray(r), 15+32).decode('utf-8')
try:
raw_xml = decompress(bytearray(r), 15+32).decode('utf-8')
except Exception as e:
index += 1
continue
try:
entry = parse(raw_xml)["danmu"]["data"]["entry"]
except Exception as e:
@@ -58,21 +47,17 @@ def get_danmu_by_tvid(name, duration, tvid):
continue
# with open("raw_xml.json", "w", encoding="utf-8") as f:
# f.write(json.dumps(parse(raw_xml), ensure_ascii=False, indent=4))
contents = []
if entry.__class__ != list:
entry = [entry]
for comment in entry:
if comment.get("list") is None:
continue
bulletInfo = comment["list"]["bulletInfo"]
if bulletInfo.__class__ != list:
bulletInfo = [bulletInfo]
for info in bulletInfo:
content = check_content(info["content"], contents)
if content is None:
continue
else:
contents.append(content)
color = [info["color"]]
comments.append([content, color, int(comment["int"])])
comments.append([info["content"], color, int(comment["int"])])
print("已下载{:.2f}%".format(index * timestamp * 100 / duration))
index += 1
comments = sorted(comments, key=lambda _: _[-1])
@@ -81,25 +66,31 @@ def get_danmu_by_tvid(name, duration, tvid):
def main(args):
vinfos = []
isall = False
if args.series:
isall = True
if args.tvid:
vi = get_vinfo_by_tvid(args.tvid)
vi = get_vinfo_by_tvid(args.tvid, isall=isall)
if vi:
vinfos.append(vi)
if args.aid:
vi = get_vinfos(args.aid)
if vi:
vinfos += vi
# if args.url:
# vi = get_vinfos_by_url(args.url)
# if vi:
# vinfos += vi
if args.tvid == "" and args.aid == "" and args.url == "":
args.url = input("请输入iqiyi链接\n")
if args.url:
vi = get_vinfos_by_url(args.url, isall=isall)
if vi:
vinfos += vi
subtitles = {}
for name, duration, tvid in vinfos:
print(name, "开始下载...")
flag, file_path = check_file(name, skip=args.y)
flag, file_path = check_file(name, args)
if flag is False:
print("跳过{}".format(name))
return
continue
comments = get_danmu_by_tvid(name, duration, tvid)
comments = write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments})
return subtitles

206
sites/mgtv.py Normal file
View File

@@ -0,0 +1,206 @@
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-28 15:55:22
# 上次编辑时间 : 2020-02-07 18:32:05
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import re
import json
import time
import base64
import requests
from uuid import uuid4
from collections import OrderedDict
from basic.vars import chrome
from pfunc.request_info import duration_to_sec
from pfunc.dump_to_ass import check_file, write_one_video_subtitles
pno_params = {
"pad":"1121",
"ipad":"1030"
}
type_params = {
"h5flash":"h5flash",
"padh5":"padh5",
"pch5":"pch5"
}
def get_danmu_by_vid(vid: str, cid: str, duration: int):
api_url = "https://galaxy.bz.mgtv.com/rdbarrage"
params = OrderedDict({
"version": "2.0.0",
"vid": vid,
"abroad": "0",
"pid": "",
"os": "",
"uuid": "",
"deviceid": "",
"cid": cid,
"ticket": "",
"time": "0",
"mac": "",
"platform": "0",
"callback": ""
})
comments = []
index = 0
max_index = duration // 60 + 1
while index < max_index:
params["time"] = str(index * 60 * 1000)
try:
r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
except Exception as e:
continue
items = json.loads(r)["data"]["items"]
index += 1
if items is None:
continue
for item in items:
comments.append([item["content"], ["ffffff"], int(item["time"] / 1000)])
print("已下载{:.2f}%".format(index / max_index * 100))
return comments
def get_tk2(did):
pno = pno_params["ipad"]
ts = str(int(time.time()))
text = f"did={did}|pno={pno}|ver=0.3.0301|clit={ts}"
tk2 = base64.b64encode(text.encode("utf-8")).decode("utf-8").replace("+", "_").replace("/", "~").replace("=", "-")
return tk2[::-1]
def get_vinfos_by_cid_or_vid(xid: str, flag="vid"):
api_url = "https://pcweb.api.mgtv.com/episode/list"
params = {
"video_id": xid,
"page": "0",
"size": "25",
"cxid": "",
"version": "5.5.35",
"callback": "",
"_support": "10000000",
"_": str(int(time.time() * 1000))
}
if flag == "cid":
_ = params.pop("video_id")
params["collection_id"] = xid
page = 1
vinfos = []
while True:
params["page"] = page
try:
r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
except Exception as e:
continue
data = json.loads(r)["data"]
for ep in data["list"]:
if re.match("\d\d\d\d-\d\d-\d\d", ep["t4"]):
# 综艺的加上日期
name = "{t4}_{t3}_{t2}".format(**ep).replace(" ", "")
else:
name = "{t3}_{t2}".format(**ep).replace(" ", "")
duration = duration_to_sec(ep["time"])
vinfos.append([name, duration, ep["video_id"], ep["clip_id"]])
if page < data["count"] // 25 + 1:
page += 1
else:
break
return vinfos
def get_vinfo_by_vid(vid: str):
api_url = "https://pcweb.api.mgtv.com/player/video"
type_ = type_params["pch5"]
did = uuid4().__str__()
suuid = uuid4().__str__()
params = OrderedDict({
"did": did,
"suuid": suuid,
"cxid": "",
"tk2": get_tk2(did),
"video_id": vid,
"type": type_,
"_support": "10000000",
"auth_mode": "1",
"callback": ""
})
try:
r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
except Exception as e:
return
info = json.loads(r)["data"]["info"]
name = "{title}_{series}_{desc}".format(**info).replace(" ", "")
duration = int(info["duration"])
cid = info["collection_id"]
return [name, duration, vid, cid]
def get_vinfos_by_url(url: str, isall: bool):
vinfos = []
# url = https://www.mgtv.com/b/323323/4458375.html
ids = re.match("[\s\S]+?mgtv.com/b/(\d+)/(\d+)\.html", url)
# url = "https://www.mgtv.com/h/333999.html?fpa=se"
cid_v1 = re.match("[\s\S]+?mgtv.com/h/(\d+)\.html", url)
# url = "https://m.mgtv.com/h/333999/0.html"
cid_v2 = re.match("[\s\S]+?mgtv.com/h/(\d+)/\d\.html", url)
if ids is None and cid_v1 is None and cid_v2 is None:
return
if ids and ids.groups().__len__() == 2:
cid, vid = ids.groups()
if isall:
vi = get_vinfos_by_cid_or_vid(vid)
if vi:
vinfos += vi
else:
vinfo = get_vinfo_by_vid(vid)
if vinfo is None:
return
vinfos.append(vinfo)
if cid_v1 or cid_v2:
if cid_v2 is None:
cid = cid_v1.group(1)
else:
cid = cid_v2.group(1)
vi = get_vinfos_by_cid_or_vid(cid, flag="cid")
if vi:
vinfos += vi
return vinfos
def main(args):
vinfos = []
isall = False
if args.series:
isall = True
if args.url:
vi = get_vinfos_by_url(args.url, isall)
if vi:
vinfos += vi
if args.vid:
if isall:
vi = get_vinfos_by_cid_or_vid(args.vid)
if vi:
vinfos += vi
else:
vi = get_vinfo_by_vid(args.vid)
if vi:
vinfos.append(vi)
if args.cid:
vi = get_vinfos_by_cid_or_vid(args.cid)
if vi:
vinfos += vi
subtitles = {}
for name, duration, vid, cid in vinfos:
print(name, "开始下载...")
flag, file_path = check_file(name, args)
if flag is False:
print("跳过{}".format(name))
continue
comments = get_danmu_by_vid(vid, cid, duration)
write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments})
print(name, "下载完成!")
return subtitles
if __name__ == "__main__":
args = object()
args.url = "https://www.mgtv.com/h/333999.html?fpa=se"
main(args)

View File

@@ -1,9 +1,9 @@
#!/usr/bin/env python
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2019-12-18 09:37:15
# 上次编辑时间 : 2020-01-04 17:53:28
# 创建日期: 2020-01-04 19:14:37
# 上次编辑时间 : 2020-01-16 20:04:51
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
@@ -13,8 +13,8 @@ import json
import requests
from basic.vars import qqlive
from basic.ass import check_content
from pfunc.dump_to_ass import check_file
from pfunc.dump_to_ass import check_file, write_one_video_subtitles
from pfunc.request_info import get_cid_by_vid
from pfunc.request_info import get_all_vids_by_cid as get_vids
from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id
@@ -80,13 +80,7 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
# timestamp不变 再试一次
continue
danmu_count = danmu["count"]
contents = []
for comment in danmu["comments"]:
content = check_content(comment["content"], contents)
if content is None:
continue
else:
contents.append(content)
if comment["content_style"]:
style = json.loads(comment["content_style"])
if style.get("gradient_colors"):
@@ -97,17 +91,17 @@ def get_danmu_by_target_id(vid: str, duration: int, target_id, font="微软雅
color = ["ffffff"]
else:
color = ["ffffff"]
comments.append([content, color, comment["timepoint"]])
comments.append([comment["content"], color, comment["timepoint"]])
print("已下载{:.2f}%".format(params["timestamp"]*100/duration))
params["timestamp"] += 30
comments = sorted(comments, key=lambda _: _[-1])
return comments
def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=False):
def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, args=""):
vid, name, duration, target_id = vinfo
print(name, "开始下载...")
flag, file_path = check_file(name, skip=skip)
flag, file_path = check_file(name, args)
if flag is False:
print("跳过{}".format(name))
return
@@ -115,7 +109,7 @@ def get_one_subtitle_by_vinfo(vinfo, font="微软雅黑", font_size=25, skip=Fal
# print("{}弹幕下载完成!".format(name))
return comments, file_path
def ask_input(url=""):
def ask_input(url="", isall=False):
if url == "":
url = input("请输入vid/coverid/链接输入q退出\n").strip()
if url == "q" or url == "":
@@ -124,6 +118,9 @@ def ask_input(url=""):
params = url.replace(".html", "").split("/")
if params[-1].__len__() == 11:
vids = [params[-1]]
if isall:
cid = get_cid_by_vid(params[-1])
vids += get_vids(cid)
elif params[-1].__len__() == 15:
cid = params[-1]
vids = get_vids(cid)
@@ -139,6 +136,9 @@ def ask_input(url=""):
def main(args):
vids = []
isall = False
if args.series:
isall = True
if args.cid and args.cid.__len__() == 15:
vids += get_vids(args.cid)
if args.vid:
@@ -148,16 +148,30 @@ def main(args):
vids += [vid for vid in args.vid.strip().replace(" ", "").split(",") if vid.__len__() == 11]
else:
pass
if args.series:
cid = get_cid_by_vid(args.vid)
vids += get_vids(cid)
if args.url:
vids += ask_input(url=args.url)
vids += ask_input(url=args.url, isall=isall)
if args.vid == "" and args.cid == "" and args.url == "":
vids += ask_input()
vids += ask_input(isall=isall)
if vids.__len__() <= 0:
sys.exit("没有任何有效输入")
vids_bak = vids
vids = []
for vid in vids_bak:
if vid in vids:
continue
else:
vids.append(vid)
vinfos = get_video_info_by_vid(vids)
subtitles = {}
for vinfo in vinfos:
comments, file_path = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y)
infos = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args=args)
if infos is None:
continue
comments, file_path = infos
comments = write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments})
return subtitles

199
sites/sohu.py Normal file
View File

@@ -0,0 +1,199 @@
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-16 17:45:35
# 上次编辑时间 : 2020-02-07 18:43:55
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import json
import requests
from basic.vars import chrome
from pfunc.request_info import matchit
from pfunc.dump_to_ass import check_file, write_one_video_subtitles
def try_decode(content):
flag = False
methods = ["gbk", "utf-8"]
for method in methods:
try:
content_decode = content.decode(method)
except Exception as e:
print("try {} decode method failed.".format(method))
continue
flag = True
break
if flag is True:
return content_decode
else:
return None
def get_vinfos_by_url(url: str):
ep_url = matchit(["[\s\S]+?tv.sohu.com/v/(.+?)\.html", "[\s\S]+?tv.sohu.com/(.+?)/(.+?)\.html"], url)
aid_url = matchit(["[\s\S]+?tv.sohu.com/album/.(\d+)\.shtml"], url)
vid_url = matchit(["[\s\S]+?tv.sohu.com/v(\d+)\.shtml"], url)
if ep_url:
try:
r = requests.get(url, headers=chrome, timeout=3).content
except Exception as e:
print(e)
print("get sohu (url -> {}) ep url failed.".format(url))
return
r_decode = try_decode(r)
if r_decode is None:
print("ep response use decode failed(url -> {}).".format(url))
return None
vid = matchit(["[\s\S]+?var vid.+?(\d+)"], r_decode)
if vid:
vinfo = get_vinfo_by_vid(vid)
if vinfo is None:
return
else:
return [vinfo]
else:
print("match sohu vid (url -> {}) failed.".format(url))
return None
if aid_url:
return get_vinfos(aid_url)
if vid_url:
vinfo = get_vinfo_by_vid(vid_url)
if vinfo is None:
return
else:
return [vinfo]
if ep_url is None and aid_url is None and vid_url is None:
# 可能是合集页面
try:
r = requests.get(url, headers=chrome, timeout=3).content
except Exception as e:
print("get sohu (url -> {}) album url failed.".format(url))
return
r_decode = try_decode(r)
if r_decode is None:
print("album response decode failed(url -> {}).".format(url))
return None
aid = matchit(["[\s\S]+?var playlistId.+?(\d+)"], r_decode)
if aid:
return get_vinfos(aid)
return
def get_vinfos(aid: str):
api_url = "https://pl.hd.sohu.com/videolist"
params = {
"callback": "",
"playlistid": aid,
"o_playlistId": "",
"pianhua": "0",
"pagenum": "1",
"pagesize": "999",
"order": "0", # 0 从小到大
"cnt": "1",
"pageRule": "2",
"withPgcVideo": "0",
"ssl": "0",
"preVideoRule": "3",
"_": "" # 1579167883430
}
try:
r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("gbk")
except Exception as e:
print("get sohu (aid -> {}) videolist failed.".format(aid))
return None
data = json.loads(r)
if data.get("videos"):
videos = data["videos"]
else:
print("videolist has no videos (aid -> {}).".format(aid))
return None
vinfos = [[video["name"], int(float(video["playLength"])), video["vid"], aid] for video in videos]
return vinfos
def get_vinfo_by_vid(vid: str):
api_url = "https://hot.vrs.sohu.com/vrs_flash.action"
params = {
"vid": vid,
"ver": "31",
"ssl": "1",
"pflag": "pch5"
}
try:
r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
except Exception as e:
print("get sohu (vid -> {}) vinfo failed.".format(vid))
return None
data = json.loads(r)
if data.get("status") == 1:
aid = ""
if data.get("pid"):
aid = str(data["pid"])
if data.get("data"):
data = data["data"]
else:
print("vid -> {} vinfo request return no data.".format(vid))
return
else:
print("vid -> {} vinfo request return error.".format(vid))
return
return [data["tvName"], int(float(data["totalDuration"])), vid, aid]
def get_danmu_all_by_vid(vid: str, aid: str, duration: int):
api_url = "https://api.danmu.tv.sohu.com/dmh5/dmListAll"
params = {
"act": "dmlist_v2",
"dct": "1",
"request_from": "h5_js",
"vid": vid,
"page": "1",
"pct": "2",
"from": "PlayerType.SOHU_VRS",
"o": "4",
"aid": aid,
"time_begin": "0",
"time_end": str(duration)
}
try:
r = requests.get(api_url, params=params, headers=chrome, timeout=3).content.decode("utf-8")
except Exception as e:
print("get sohu (vid -> {}) danmu failed.".format(vid))
return None
data = json.loads(r)["info"]["comments"]
comments = []
for comment in data:
comments.append([comment["c"], "ffffff", comment["v"]])
comments = sorted(comments, key=lambda _: _[-1])
return comments
def main(args):
vinfos = []
if args.vid:
vi = get_vinfo_by_vid(args.vid)
if vi:
vinfos.append(vi)
if args.aid:
vi = get_vinfos(args.aid)
if vi:
vinfos += vi
if args.vid == "" and args.aid == "" and args.url == "":
args.url = input("请输入sohu链接\n")
if args.url:
vi = get_vinfos_by_url(args.url)
if vi:
vinfos += vi
subtitles = {}
for name, duration, vid, aid in vinfos:
print(name, "开始下载...")
flag, file_path = check_file(name, args)
if flag is False:
print("跳过{}".format(name))
continue
comments = get_danmu_all_by_vid(vid, aid, duration)
if comments is None:
print(name, "弹幕获取失败了,记得重试~(@^_^@)~")
continue
comments = write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments})
print(name, "下载完成!")
return subtitles

141
sites/youku.py Normal file
View File

@@ -0,0 +1,141 @@
#!/usr/bin/env python3.7
# coding=utf-8
'''
# 作者: weimo
# 创建日期: 2020-01-05 14:52:21
# 上次编辑时间 : 2020-01-16 19:59:08
# 一个人的命运啊,当然要靠自我奋斗,但是...
'''
import re
import time
import json
import base64
import requests
from basic.vars import chrome
from pfunc.dump_to_ass import check_file, write_one_video_subtitles
from pfunc.cfunc import yk_msg_sign, yk_t_sign
from pfunc.request_info import get_vinfos_by_show_id, get_vinfos_by_video_id, get_vinfos_by_url_youku
def get_tk_enc():
"""
获取优酷的_m_h5_tk和_m_h5_tk_enc
"""
api_url = "https://acs.youku.com/h5/mtop.com.youku.aplatform.weakget/1.0/?jsv=2.5.1&appKey=24679788"
try:
r = requests.get(api_url, headers=chrome, timeout=5)
except Exception as e:
return
tk_enc = dict(r.cookies)
if tk_enc.get("_m_h5_tk_enc") and tk_enc.get("_m_h5_tk"):
return tk_enc
return
def get_cna():
api_url = "https://log.mmstat.com/eg.js"
try:
r = requests.get(api_url, headers=chrome, timeout=5)
except Exception as e:
return
cookies = dict(r.cookies)
if cookies.get("cna"):
return cookies["cna"]
return
def get_danmu_by_mat(vid, cna, mat: int, comments: list):
api_url = "https://acs.youku.com/h5/mopen.youku.danmu.list/1.0/"
tm = str(int(time.time() * 1000))
msg = {
"ctime": tm,
"ctype": 10004,
"cver": "v1.0",
"guid": cna,
"mat": mat,
"mcount": 1,
"pid": 0,
"sver": "3.1.0",
"type": 1,
"vid": vid}
msg_b64encode = base64.b64encode(json.dumps(msg, separators=(',', ':')).encode("utf-8")).decode("utf-8")
msg.update({"msg":msg_b64encode})
msg.update({"sign":yk_msg_sign(msg_b64encode)})
# 测试发现只要有Cookie的_m_h5_tk和_m_h5_tk_enc就行
tk_enc = get_tk_enc()
if tk_enc is None:
return
headers = {
"Content-Type":"application/x-www-form-urlencoded",
"Cookie":";".join([k + "=" + v for k, v in tk_enc.items()]),
"Referer": "https://v.youku.com"
}
headers.update(chrome)
t = str(int(time.time() * 1000))
data = json.dumps(msg, separators=(',', ':'))
params = {
"jsv":"2.5.6",
"appKey":"24679788",
"t":t,
"sign":yk_t_sign(tk_enc["_m_h5_tk"][:32], t, "24679788", data),
"api":"mopen.youku.danmu.list",
"v":"1.0",
"type":"originaljson",
"dataType":"jsonp",
"timeout":"20000",
"jsonpIncPrefix":"utility"
}
try:
r = requests.post(api_url, params=params, data={"data":data}, headers=headers, timeout=5).content.decode("utf-8")
except Exception as e:
print("youku danmu request failed.", e)
return "once again"
result = json.loads(json.loads(r)["data"]["result"])["data"]["result"]
for item in result:
comment = item["content"]
c_int = json.loads(item["propertis"])["color"]
if c_int.__class__ == str:
c_int = int(c_int)
color = hex(c_int)[2:].zfill(6)
timepoint = item["playat"] / 1000
comments.append([comment, [color], timepoint])
return comments
def main(args):
cna = get_cna()
if cna is None:
# 放前面 免得做无用功
return
isall = False
if args.series:
isall = True
vinfos = []
if args.url:
vi = get_vinfos_by_url_youku(args.url, isall=isall)
if vi:
vinfos += vi
if args.vid:
vi = get_vinfos_by_video_id(args.vid, isall=isall)
if vi:
vinfos += vi
subtitles = {}
for name, duration, video_id in vinfos:
print(name, "开始下载...")
flag, file_path = check_file(name, args=args)
if flag is False:
print("跳过{}".format(name))
continue
max_mat = duration // 60 + 1
comments = []
for mat in range(max_mat):
result = get_danmu_by_mat(video_id, cna, mat + 1, comments)
if result is None:
continue
elif result == "once again":
# 可能改成while好点
result = get_danmu_by_mat(video_id, cna, mat + 1, comments)
if result is None:
continue
comments = result
print("已下载{}/{}".format(mat + 1, max_mat))
comments = write_one_video_subtitles(file_path, comments, args)
subtitles.update({file_path:comments})
return subtitles