查看原文
其他

百度网盘批量转存.py

carry麦 码农真经 2023-12-25

现在好多资源都放在网盘里,有时候拿到一个资源列表,发现全是网盘链接跟提取码。一个一个去存很麻烦。于是我写了个python脚本,可以批量这些资源 。


视频演示






具体步骤




1 登录百度网盘,获取cookie
要使用 百度网盘批量转存工具 需要获取百度 Cookies,首先登录 https://pan.baidu.com/disk/home,这里以 Chrome 为例,只需要打开开发人员工具,进入network,请求头上找到cookie。然后把这一串复制出来。

将cookie粘贴到代码中。

2 准备好要下载的链接。
如,这是一个百度盘资源下载链接及提取码。

将该下载链接保存到文本文件。

3 脚本的使用方法:

百度网盘文件批量转存 (如果检测到没有登录信息,会提示扫码登录)
optional arguments: -h, --help show this help message and exit -p PATH, --path PATH 必选,存有下载链接的文本文件路径 1. 文本里面链接和密码不要在同一行 2. 文本里面可以有多余的文字,目前支持一定程度的模糊匹配 3. 链接所在的那一行,链接后面除了空格外不要有多余文字 -s SAVE_FOLDER, --save_folder SAVE_FOLDER 可选,在百度网盘中的存储路径,默认存储在根目录 -c COOKIES, --cookies COOKIES 可选,设置 Cookie -e {raise,ignore}, --errors {raise,ignore} 可选,遇到错误的处理方式: raise: 报错然后停止程序,默认; ignore: 忽略而后进行下一个 -H HEADER, --header HEADER 可选,请求头

4 运行命令
命令主要形式如下:
python 百度网盘转存.py -p download_links.txt -s "/网盘路径(该路径需先创建,无-s参数默认为根目录)“


5 查看效果
链接所包含的资源。

转存到我百度盘的资源



完整代码




#! /usr/bin/env python3import requests
from base64 import b64encodefrom json import loadsfrom re import compile as re_compilefrom time import timefrom typing import Optional, Unionfrom urllib.parse import urlencode, unquotefrom uuid import uuid4

__version__ = (0, 0, 3)__author__ = 'ChenyangGao <https://github.com/ChenyangGao>'

# TODO: 请把 Cookie 写到下面HEADERS = '''Cookie: '''

try: def startfile(path, _func=__import__('os').startfile): '打开文件或者文件夹 (适用于 Windows)' _func(path)except AttributeError: _PLATFROM_SYSTEM = __import__('platform').system() if _PLATFROM_SYSTEM == 'Linux': def startfile(path, _func=__import__('subprocess').Popen): '打开文件或者文件夹 (适用于 Linux)' _func(['xdg-open', path]) elif _PLATFROM_SYSTEM == 'Darwin': def startfile(path, _func=__import__('subprocess').Popen): '打开文件或者文件夹 (适用于 MacOSX)' _func(['open', path]) else: def startfile(path, _func=lambda x: x): '说是要打开文件或者文件夹,其实什么都不做' # TODO: 实际上倒是可以用浏览器打开这个文件夹 _func(path) del _PLATFROM_SYSTEM

def text_to_dict( text: str, item_sep: str='\n', kv_sep: str='=') -> dict: return dict(item.split(kv_sep, 1) # type: ignore for item in text.split(item_sep) if item)

class TransferError(Exception): pass

class Errno(TransferError): pass

class DuPanTransfer: '百度网盘文件转存'
def __init__(self, headers: Union[dict, str] = HEADERS): if isinstance(headers, str): try: headers = text_to_dict( headers, kv_sep=': ') except ValueError as exc: raise ValueError('请求头格式错误,正确形如\nkey1: value1\nkey2: value2\n...') from exc
session = self.session = requests.session() if 'Cookie' not in headers or 'BAIDUID=' not in headers['Cookie']: if input('检测到未登录,是否采取扫码登录? (Y)/N ').strip().upper() in ('', 'Y'): headers.pop('Cookie', None) self.login_by_qrcode() cookiejar = session.cookies else: session.headers.update(headers) cookiejar = requests.cookies.cookiejar_from_dict( text_to_dict( session.headers['Cookie'], item_sep='; ', kv_sep='=', ) )
self.logid = b64encode(cookiejar['BAIDUID'].encode('ascii')).decode()
def login_by_qrcode(self): '用 app 扫描二维码登录' ss = self.session
gid = str(uuid4()).upper()
msg_getqrcode = self._fetch_json( 'GET', 'https://passport.baidu.com/v2/api/getqrcode', params={ 'lp': 'pc', 'qrloginfrom': 'pc', 'gid': gid, 'apiver': 'v3', 'tpl': 'netdisk', } )
imgurl = 'https://' + msg_getqrcode['imgurl'] channel_id = msg_getqrcode['sign']
img = self._fetch('GET', imgurl) open('qrimg.png', 'wb').write(img) startfile('qrimg.png')
def query_login_status(): with ss.get( 'https://passport.baidu.com/channel/unicast', params={ 'channel_id': channel_id, 'tpl': 'netdisk', 'gid': gid, 'apiver': 'v3', } ) as resp: resp.raise_for_status() return resp.json()
while True: print('请扫码或点击登录!') msg = query_login_status() if msg['errno'] == 0: channel_v = msg['channel_v'] if type(channel_v) is str: channel_v = loads(channel_v) if channel_v['status'] == 0: print('成功扫码登录') break elif channel_v['status'] == 1: print('扫码成功') elif channel_v['status'] == 2: raise Exception('取消扫码登录') elif msg['errno'] == 1: pass else: raise Exception(msg)
with ss.get( 'https://passport.baidu.com/v3/login/main/qrbdusslogin?bduss='+channel_v['v'] ) as resp: resp.raise_for_status()
def _fetch(self, *args, **kwargs): with self.session.request(*args, **kwargs) as resp: resp.raise_for_status() return resp.content
def _fetch_json(self, *args, **kwargs): with self.session.request(*args, **kwargs) as resp: resp.raise_for_status() msg = resp.json() if msg['errno'] != 0: raise Errno(msg) return msg
def verify( self, url_or_shorturl: str, code: str, bdstoken: str = '', ) -> dict: '提交验证码' cre1 = re_compile(r'https?://pan\.baidu\.com/s/1(?P<shorturl>[^?&#]+).*') cre2 = re_compile(r'https?://pan\.baidu\.com/share/init\?(?:.*?&)?surl=(?P<shorturl>[^&#]+).*') while True: match = cre1.fullmatch(url_or_shorturl) if match: shorturl = match['shorturl'] break match = cre2.fullmatch(url_or_shorturl) if match: shorturl = match['shorturl'] break shorturl = url_or_shorturl break
session = self.session msg = self._fetch_json( 'POST', 'https://pan.baidu.com/share/verify', params={ 'surl': shorturl, 't': int(time() * 1000), 'channel': 'chunlei', 'web': 1, 'app_id': 250528, 'clienttype': 0, 'bdstoken': bdstoken, 'logid': self.logid, 'clienttype': 0, }, data={'pwd': code}, headers={'Referer': 'https://pan.baidu.com/share/init?surl=' + shorturl} )
session.cookies.set('BDCLND', msg['randsk'], domain='') return msg
@staticmethod def _extract_data( html_content: bytes, _cre=re_compile(br'locals\.mset\((.*?)\);'), ) -> dict: '提取下载相关数据' match = _cre.search(html_content) # 不能匹配:可能是页面下载失败、被服务器限制访问等原因 if match is None: raise TransferError('没有提取到页面相关数据,可能是没有登录、链接失效、分享被取消等原因') return loads(match[1])
def transfer( self, link: str, code: Optional[str] = None, save_folder: str = '/', ensure_save_folder: bool = False, ) -> dict: '''转存文件到百度网盘
:param link: 分享链接 :param code: 验证码,可选 :param save_folder: 存储到这个文件夹,默认是 /,也就是网盘根目录 :param ensure_save_folder: 如果为 True,则保证在转存前 `save_folder` 是存在的
:return: 转存接口返回到 JSON 信息 ''' # 当 link 是 http 协议时,会重定向到 https 的对应页面 if ensure_save_folder: self.create_folder(save_folder) if link.startswith('http://'): link = 'https' + link[4:] session = self.session html_content = self._fetch('GET', link) data = self._extract_data(html_content) # 如果出现验证框,说明需要输入 code if b'"verify-form"' in html_content: if code is None: raise TransferError('需要密码') self.verify(link, code, data['bdstoken']) html_content = self._fetch('GET', link) data = self._extract_data(html_content)
file_list = data['file_list'] # 没有找到文件列表,这可能意味着分享链接失效了 if file_list is None: raise TransferError('没有找到下载文件,可能是链接失效、分享被取消等原因') fsidlist = [f['fs_id'] for f in file_list] with session.post( 'https://pan.baidu.com/share/transfer', params={ 'shareid': data['shareid'], 'from': data['share_uk'], 'sekey': unquote(session.cookies.get('BDCLND', '', domain='')), 'ondup': 'newcopy', 'async': 1, 'channel': 'chunlei', 'web': 1, 'app_id': 250528, 'bdstoken': data['bdstoken'], 'logid': self.logid, 'clienttype': 0, }, data=urlencode({'fsidlist': fsidlist, 'path': save_folder}), headers={ 'Referer': link, 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }, ) as resp: resp.raise_for_status() msg = resp.json() if msg['errno'] != 0: raise Errno(msg) return msg
def create_folder(self, path): '创建文件夹,形如 /a/b/c/... 这样的多级文件夹,可以一次性创建,类似 mkdir -p' html_content = self._fetch('GET', 'https://pan.baidu.com/disk/home') data = self._extract_data(html_content) return self._fetch_json( 'POST', 'https://pan.baidu.com/api/create', params={'' 'a': 'commit', 'channel': 'chunlei', 'web': 1, 'app_id': 250528, 'logid': self.logid, 'bdstoken': data['bdstoken'], 'clienttype': 0, }, data=urlencode({ 'path': path, 'isdir': 1, 'block_list': [], }), headers={ 'Referer': 'https://pan.baidu.com/disk/home?', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', }, )

if __name__ == '__main__': # [] TODO: 支持对网盘文件的增删改查 # [] TODO: 更丰富的失败原因反馈 # [] TODO: 如果转存大量文件发生失败,尝试分成几份后再尝试进行转存 # [] TODO: 如果转存的目标目录不存在,先创建目录,再转存 from argparse import ArgumentParser, RawTextHelpFormatter
def extract_share_links( path, _cre=re_compile('([0-9a-zA-Z]{4})[\t ]*$'), ): link_list = [] prev_is_link = False with open(path) as f: for row in f: if not row.strip(): continue if 'http' in row: link_info = {} link_info['link'] = row[row.index('http'):].rstrip() link_list.append(link_info) prev_is_link = True continue if prev_is_link: match_pwd = _cre.search(row) if match_pwd is not None: link_info['code'] = match_pwd[1] prev_is_link = False return link_list
ap = ArgumentParser( description='百度网盘文件批量转存 (如果检测到没有登录信息,会提示扫码登录)', formatter_class=RawTextHelpFormatter, ) ap.add_argument('-p', '--path', default=None, help='必选,存有下载链接的文本文件路径\n' '1. 文本里面链接和密码不要在同一行\n' '2. 文本里面可以有多余的文字,目前支持一定程度的模糊匹配\n' '3. 链接所在的那一行,链接后面除了空格外不要有多余文字\n') ap.add_argument('-s', '--save_folder', default='/', help='可选,在百度网盘中的存储路径,默认存储在根目录') ap.add_argument('-c', '--cookies', default=None, help='可选,设置 Cookie') ap.add_argument('-e', '--errors', default='raise', choices=('raise', 'ignore'), help='可选,遇到错误的处理方式:\n' ' raise: 报错然后停止程序,默认;\n' ' ignore: 忽略而后进行下一个') ap.add_argument('-H', '--header', help='可选,请求头')
args = ap.parse_args() if args.path is None: ap.parse_args(['-h'])
if args.header is not None: HEADERS = HEADERS + '\n' + args.header if args.cookies is not None: HEADERS = HEADERS + '\nCookie: ' + args.cookies
share_links = extract_share_links(args.path)
errors = args.errors pt = DuPanTransfer(headers=HEADERS) for share_link in share_links: try: msg = pt.transfer(**share_link, save_folder=args.save_folder) print('[成功]', share_link, ':', msg) except Exception as exc: if errors == 'ignore': print('[失败]', share_link, ':', repr(exc)) raise

往期推荐

Python企业级全技术栈开发 视频教程

Python项目开发案例集锦(全彩版+代码+资源)

Python脚本批量下载创世纪图书馆电子书

python 实现超时退出

Python编程快速上手(第2版)让繁琐工作自动化

继续滑动看下一个

您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存