diff --git a/GetDanMu.py b/GetDanMu.py index c2f34c0..dd15fe0 100644 --- a/GetDanMu.py +++ b/GetDanMu.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2020-01-04 12:59:11 -# 上次编辑时间 : 2020-01-04 20:14:39 +# 创建日期: 2020-01-04 19:14:39 +# 上次编辑时间: 2020-01-05 14:47:53 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -13,10 +13,7 @@ from argparse import ArgumentParser from sites.qq import main as qq from sites.iqiyi import main as iqiyi -from basic.ass import get_ass_head, check_font -from pfunc.dump_to_ass import write_lines_to_file -from methods.assbase import ASS -from methods.sameheight import SameHeight +from pfunc.cfunc import check_url_site # ------------------------------------------- # 基本流程 @@ -32,7 +29,7 @@ def main(): parser = ArgumentParser(description="视频网站弹幕转换/下载工具,任何问题请联系vvtoolbox.dev@gmail.com") parser.add_argument("-f", "--font", default="微软雅黑", help="指定输出字幕字体") parser.add_argument("-fs", "--font-size", default=28, help="指定输出字幕字体大小") - parser.add_argument("-s", "--site", default="qq", help="指定网站") + parser.add_argument("-s", "--site", default="", help="指定网站") parser.add_argument("-cid", "--cid", default="", help="下载cid对应视频的弹幕(腾讯视频合集)") parser.add_argument("-vid", "--vid", default="", help="下载vid对应视频的弹幕,支持同时多个vid,需要用逗号隔开") parser.add_argument("-aid", "--aid", default="", help="下载aid对应视频的弹幕(爱奇艺合集)") @@ -41,20 +38,15 @@ def main(): parser.add_argument("-y", "--y", action="store_true", help="覆盖原有弹幕而不提示") args = parser.parse_args() # print(args.__dict__) - font_path, font_style_name = check_font(args.font) - ass_head = get_ass_head(font_style_name, args.font_size) + if args.url != "": + args.site = check_url_site(args.url) + if args.site == "": + args.site = input("请输入站点(qq/iqiyi/youku):\n") if args.site == "qq": subtitles = qq(args) if args.site == "iqiyi": subtitles = iqiyi(args) - for file_path, comments in subtitles.items(): - get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", font_path=font_path, font_size=args.font_size) - subtitle = ASS(file_path, get_xy_obj, font=font_style_name) - for comment in comments: - subtitle.create_new_line(comment) - write_lines_to_file(ass_head, subtitle.lines, file_path) - if __name__ == "__main__": # 打包 --> pyinstaller GetDanMu.spec main() \ No newline at end of file diff --git a/README.md b/README.md index 62a2024..2fde5cc 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,30 @@ + # GetDanMu + 转换/下载各类视频的弹幕 + +## 网站支持 +| Site | URL | 单集? | 合集? | 综艺合集? | +| :--: | :-- | :-----: | :-----: | :-----: | +| **腾讯视频** | |✓|✓| | +| **爱奇艺** | |✓|✓|✓| + +## 可能存在的问题 +- 下载进度接近100%时暂时没有反应 + +这是因为在全部弹幕获取完后一次性处理所致,对于时间过长和弹幕过多的视频,处理耗时较多,属于正常现象。 +- 命令组合未达到预期效果 + +当前的逻辑并不完善,如果出现这种现象请反馈给我。 + +# 更新日志 + +## 2020/1/5 + +- 增加了通过链接下载爱奇艺视频弹幕的方法,支持综艺合集。 +- 增加通过链接判断网站 \ No newline at end of file diff --git a/basic/ass.py b/basic/ass.py index 4193444..f11b57c 100644 --- a/basic/ass.py +++ b/basic/ass.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2020-01-04 13:05:23 -# 上次编辑时间 : 2020-01-04 15:52:11 +# 创建日期: 2020-01-04 19:14:46 +# 上次编辑时间: 2020-01-05 14:45:55 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' diff --git a/basic/vars.py b/basic/vars.py index 9e1c45d..7ecec75 100644 --- a/basic/vars.py +++ b/basic/vars.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2020-01-04 13:16:18 -# 上次编辑时间 : 2020-01-04 16:08:34 +# 创建日期: 2020-01-04 19:14:35 +# 上次编辑时间: 2020-01-05 14:46:15 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' qqlive = { @@ -12,6 +12,9 @@ qqlive = { iqiyiplayer = { "User-Agent":"Qiyi List Client PC 7.2.102.1343" } +chrome = { + "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" +} fonts = { "微软雅黑":"msyh.ttc", "微软雅黑粗体":"msyhbd.ttc", diff --git a/methods/assbase.py b/methods/assbase.py index 8b94e0a..52a67fa 100644 --- a/methods/assbase.py +++ b/methods/assbase.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2020-01-04 13:01:04 -# 上次编辑时间 : 2020-01-04 15:42:02 +# 创建日期: 2020-01-04 19:14:32 +# 上次编辑时间: 2020-01-05 14:46:27 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' diff --git a/methods/sameheight.py b/methods/sameheight.py index ec2835c..f60143b 100644 --- a/methods/sameheight.py +++ b/methods/sameheight.py @@ -1,12 +1,13 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2019-12-25 20:35:43 -# 上次编辑时间 : 2019-12-25 23:23:32 +# 创建日期: 2020-01-04 19:14:47 +# 上次编辑时间: 2020-01-05 14:46:51 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' + from PIL.ImageFont import truetype class SameHeight(object): diff --git a/pfunc/cfunc.py b/pfunc/cfunc.py new file mode 100644 index 0000000..8029fe7 --- /dev/null +++ b/pfunc/cfunc.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3.7 +# coding=utf-8 +''' +# 作者: weimo +# 创建日期: 2020-01-05 12:45:18 +# 上次编辑时间: 2020-01-05 14:44:42 +# 一个人的命运啊,当然要靠自我奋斗,但是... +''' +from urllib.parse import urlparse + + +def check_url_site(url): + return urlparse(url).netloc.split(".")[-2] + +def check_url_locale(url): + flag = { + "cn":"zh_cn", + "tw":"zh_tw", + "intl":"intl" + } + if urlparse(url).netloc.split(".")[0] == "tw": + return flag["tw"] + else: + return flag["cn"] \ No newline at end of file diff --git a/pfunc/dump_to_ass.py b/pfunc/dump_to_ass.py index 37eee8b..bca3411 100644 --- a/pfunc/dump_to_ass.py +++ b/pfunc/dump_to_ass.py @@ -1,14 +1,27 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo # 创建日期: 2020-01-04 19:17:44 -# 上次编辑时间 : 2020-01-04 19:30:24 +# 上次编辑时间: 2020-01-05 14:45:03 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' - import os +from basic.ass import get_ass_head, check_font +from methods.assbase import ASS +from methods.sameheight import SameHeight + +def write_one_video_subtitles(file_path, comments, args): + # 对于合集则每次都都得检查一次 也可以放在上一级 放在这里 考虑后面可能特殊指定字体的情况 + font_path, font_style_name = check_font(args.font) + ass_head = get_ass_head(font_style_name, args.font_size) + get_xy_obj = SameHeight("那就写这一句作为初始化测试吧!", font_path=font_path, font_size=args.font_size) + subtitle = ASS(file_path, get_xy_obj, font=font_style_name) + for comment in comments: + subtitle.create_new_line(comment) + write_lines_to_file(ass_head, subtitle.lines, file_path) + def write_lines_to_file(ass_head, lines, file_path): with open(file_path, "a+", encoding="utf-8") as f: f.write(ass_head + "\n") diff --git a/pfunc/request_info.py b/pfunc/request_info.py index 7d7d8d8..c2aa539 100644 --- a/pfunc/request_info.py +++ b/pfunc/request_info.py @@ -1,17 +1,18 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2020-01-04 13:15:25 -# 上次编辑时间 : 2020-01-04 17:47:16 +# 创建日期: 2020-01-04 19:14:43 +# 上次编辑时间: 2020-01-05 14:47:16 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' - import re import json import requests -from basic.vars import qqlive, iqiyiplayer +from time import localtime +from pfunc.cfunc import check_url_locale +from basic.vars import qqlive, iqiyiplayer, chrome # 放一些仅通过某个id获取另一个/多个id的方法 @@ -26,7 +27,7 @@ def get_danmu_target_id_by_vid(vid: str): try: r = requests.get(api_url, params=params, headers=qqlive).content.decode("utf-8") except Exception as e: - print("error info -->", e) + print("target_id requests error info -->", e) return None data = json.loads(r.lstrip("QZOutputJson=").rstrip(";")) target_id = None @@ -53,7 +54,7 @@ def get_all_vids_by_cid(cid): try: nomal_ids = json.loads(data["results"][0]["fields"]["nomal_ids"]) except Exception as e: - print("error info -->", e) + print("load nomal_ids error info -->", e) return None # F 2是免费 7是会员 0是最新正片之前的预告 4是正片之后的预告 vids = [item["V"] for item in nomal_ids if item["F"] in [2, 7]] @@ -63,23 +64,132 @@ def get_all_vids_by_cid(cid): #-------------------------------------------iqiyi-------------------------------------------- -def get_vinfos(aid): +def get_vinfos(aid, locale="zh_cn"): api_url = "http://cache.video.iqiyi.com/avlist/{}/0/".format(aid) + if locale != "zh_cn": + api_url += "?locale=" + locale try: - r = requests.get(api_url, headers=iqiyiplayer).content.decode("utf-8") + r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8") except Exception as e: - print("error info -->", e) + print("get_vinfos requests error info -->", e) return None data = json.loads(r[len("var videoListC="):]) try: vlist = data["data"]["vlist"] except Exception as e: - print("error info -->", e) + print("get_vinfos load vlist error info -->", e) return None vinfos = [[v["shortTitle"] + "_" + str(v["timeLength"]), v["timeLength"], ["id"]] for v in vlist] return vinfos +def matchit(patterns, text): + ret = None + for pattern in patterns: + match = re.match(pattern, text) + if match: + ret = match.group(1) + break + return ret + +def duration_to_sec(duration): + return sum(x * int(t) for x, t in zip([3600, 60, 1][2 - duration.count(":"):], duration.split(":"))) + +def get_year_range(aid, locale="zh_cn"): + # 获取第一个和最新一个视频的年份,生成列表返回,遇到任何错误则返回当前年份 + year_start = year_end = localtime().tm_year + api_url = "http://pcw-api.iqiyi.com/album/album/baseinfo/{}".format(aid) + if locale != "zh_cn": + api_url += "?locale=" + locale + try: + r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8") + except Exception as e: + print("error info -->", e) + return list(range(year_start, year_end + 1)) + data = json.loads(r)["data"] + if data.get("firstVideo"): + year_start = int(data["firstVideo"]["period"][:4]) + if data.get("latestVideo"): + year_end = int(data["latestVideo"]["period"][:4]) + return list(range(year_start, year_end + 1)) + +def get_vinfo_by_tvid(tvid, locale="zh_cn"): + api_url = "https://pcw-api.iqiyi.com/video/video/baseinfo/{}".format(tvid) + if locale != "zh_cn": + api_url += "?locale=" + locale + try: + r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8") + except Exception as e: + print("error info -->", e) + return + data = json.loads(r)["data"] + if data.__class__ != dict: + return None + name = data["name"] + duration = data["durationSec"] + return [name + "_" + str(duration), duration, tvid] + +def get_vinfos_by_year(aid, years: list, cid=6, locale="zh_cn"): + api_url = "https://pcw-api.iqiyi.com/album/source/svlistinfo?cid={}&sourceid={}&timelist={}".format(cid, aid, ",".join([str(_) for _ in years.copy()])) + if locale != "zh_cn": + api_url += "&locale=" + locale + try: + r = requests.get(api_url, headers=chrome, timeout=5).content.decode("utf-8") + except Exception as e: + print("get_vinfos_by_year error info -->", e) + return None + data = json.loads(r)["data"] + vinfos = [] + for year in years: + if year.__class__ != str: + year = str(year) + if data.get(year) is None: + continue + for ep in data[year]: + sec = duration_to_sec(ep["duration"]) + vinfos.append([ep["shortTitle"] + "_" + str(sec), sec, ep["tvId"]]) + return vinfos + def get_vinfos_by_url(url): - pass + locale = check_url_locale(url) + patterns = [".+?/w_(\w+?).html", ".+?/v_(\w+?).html", ".+?/a_(\w+?).html", ".+?/lib/m_(\w+?).html"] + isw, isep, isas, isms = [re.match(pattern, url) for pattern in patterns] + if isw is None and isep is None and isas is None and isms is None: + return None + try: + r = requests.get(url, headers=chrome, timeout=5).content.decode("utf-8") + except Exception as e: + print("get_vinfos_by_url error info -->", e) + return None + cid_patterns = ["[\s\S]+?\.cid.+?(\d+)", "[\s\S]+?cid: \"(\d+)\"", "[\s\S]+?channelID.+?\"(\d+)\""] + cid = matchit(cid_patterns, r) + aid_patterns = ["[\s\S]+?aid:'(\d+)'", "[\s\S]+?albumid=\"(\d+)\"", "[\s\S]+?movlibalbumaid=\"(\d+)\"", "[\s\S]+?data-score-tvid=\"(\d+)\""] + aid = matchit(aid_patterns, r) + tvid_patterns = ["[\s\S]+?\"tvid\":\"(\d+)\"", "[\s\S]+?\['tvid'\].+?\"(\d+)\""] + tvid = matchit(tvid_patterns, r) + if cid is None: + cid = "" + elif cid == "6" and isas or isms:#对于综艺合集需要获取年份 + # year_patterns = ["[\s\S]+?datePublished.+?(\d\d\d\d)-\d\d-\d\d", "[\s\S]+?data-year=\"(\d+)\""] + # year = matchit(year_patterns, r) + # if year is None: + # years = [localtime().tm_year] + # else: + # years = [year] + years = get_year_range(aid, locale=locale) + else: + pass#暂时没有其他的情况计划特别处理 + + if isep or isw: + if tvid is None: + return + return get_vinfo_by_tvid(tvid, locale=locale) + + if isas or isms: + if aid is None: + return + if cid == "6": + return get_vinfos_by_year(aid, years, locale=locale) + else: + return get_vinfos(aid, locale=locale) #-------------------------------------------iqiyi-------------------------------------------- \ No newline at end of file diff --git a/sites/iqiyi.py b/sites/iqiyi.py index 1afa74b..9050ade 100644 --- a/sites/iqiyi.py +++ b/sites/iqiyi.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2019-12-18 09:48:36 -# 上次编辑时间 : 2020-01-04 17:54:46 +# 创建日期: 2020-01-04 19:14:41 +# 上次编辑时间: 2020-01-05 14:45:17 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -15,24 +15,10 @@ from xmltodict import parse from basic.vars import iqiyiplayer from basic.ass import check_content -from pfunc.dump_to_ass import check_file -from pfunc.request_info import get_vinfos +from pfunc.dump_to_ass import check_file, write_one_video_subtitles +from pfunc.request_info import get_vinfos, get_vinfos_by_url, get_vinfo_by_tvid -def get_vinfo_by_tvid(tvid): - api_url = "https://pcw-api.iqiyi.com/video/video/baseinfo/{}".format(tvid) - try: - r = requests.get(api_url, headers=iqiyiplayer).content.decode("utf-8") - except Exception as e: - print("error info -->", e) - return - data = json.loads(r)["data"] - if data.__class__ != dict: - return None - name = data["name"] - duration = data["durationSec"] - return [name + "_" + str(duration), duration, tvid] - def get_danmu_by_tvid(name, duration, tvid): # http://cmts.iqiyi.com/bullet/41/00/10793494100_300_3.z if tvid.__class__ == int: @@ -50,7 +36,11 @@ def get_danmu_by_tvid(name, duration, tvid): except Exception as e: print("error info -->", e) continue - raw_xml = decompress(bytearray(r), 15+32).decode('utf-8') + try: + raw_xml = decompress(bytearray(r), 15+32).decode('utf-8') + except Exception as e: + index += 1 + continue try: entry = parse(raw_xml)["danmu"]["data"]["entry"] except Exception as e: @@ -62,6 +52,8 @@ def get_danmu_by_tvid(name, duration, tvid): if entry.__class__ != list: entry = [entry] for comment in entry: + if comment.get("list") is None: + continue bulletInfo = comment["list"]["bulletInfo"] if bulletInfo.__class__ != list: bulletInfo = [bulletInfo] @@ -89,10 +81,12 @@ def main(args): vi = get_vinfos(args.aid) if vi: vinfos += vi - # if args.url: - # vi = get_vinfos_by_url(args.url) - # if vi: - # vinfos += vi + if args.tvid == "" and args.aid == "" and args.url == "": + args.url = input("请输入iqiyi链接:\n") + if args.url: + vi = get_vinfos_by_url(args.url) + if vi: + vinfos += vi subtitles = {} for name, duration, tvid in vinfos: print(name, "开始下载...") @@ -101,5 +95,6 @@ def main(args): print("跳过{}".format(name)) return comments = get_danmu_by_tvid(name, duration, tvid) + write_one_video_subtitles(file_path, comments, args) subtitles.update({file_path:comments}) return subtitles \ No newline at end of file diff --git a/sites/qq.py b/sites/qq.py index 2df4d2d..617c70b 100644 --- a/sites/qq.py +++ b/sites/qq.py @@ -1,9 +1,9 @@ -#!/usr/bin/env python +#!/usr/bin/env python3.7 # coding=utf-8 ''' # 作者: weimo -# 创建日期: 2019-12-18 09:37:15 -# 上次编辑时间 : 2020-01-04 17:53:28 +# 创建日期: 2020-01-04 19:14:37 +# 上次编辑时间: 2020-01-05 14:47:36 # 一个人的命运啊,当然要靠自我奋斗,但是... ''' @@ -14,7 +14,7 @@ import requests from basic.vars import qqlive from basic.ass import check_content -from pfunc.dump_to_ass import check_file +from pfunc.dump_to_ass import check_file, write_one_video_subtitles from pfunc.request_info import get_all_vids_by_cid as get_vids from pfunc.request_info import get_danmu_target_id_by_vid as get_target_id @@ -158,6 +158,7 @@ def main(args): subtitles = {} for vinfo in vinfos: comments, file_path = get_one_subtitle_by_vinfo(vinfo, args.font, args.font_size, args.y) + write_one_video_subtitles(file_path, comments, args) subtitles.update({file_path:comments}) return subtitles