网络编程
位置:首页>> 网络编程>> Python编程>> Python爬虫破解登陆哔哩哔哩的方法

Python爬虫破解登陆哔哩哔哩的方法

作者:꧁༺北海以北的等待༻꧂  发布时间:2021-03-28 14:17:40 

标签:Python,爬虫,破解,登陆

写在前面

作为一名找不到工作的爬虫菜鸡人士来说,登陆这一块肯定是个比较大的难题。
 从今天开始准备一点点对大型网站进行逐个登陆破解。加深自己爬虫水平。

环境搭建

  • Python 3.7.7环境,Mac电脑测试

  • Python内置库

  • 第三方库:rsa、urllib、requests

PC端登陆

全部代码:


'''PC登录哔哩哔哩'''
class Bilibili_For_PC():
 def __init__(self, **kwargs):
   for key, value in kwargs.items(): setattr(self, key, value)
   self.session = requests.Session()
   self.__initialize()
 '''登录函数'''
 def login(self, username, password, crack_captcha_func=None, **kwargs):
   # 若参数中给入代理,则设置
   self.session.proxies.update(kwargs.get('proxies', {}))
   # 是否需要验证码
   is_need_captcha = False
   while True:
     # 需要验证码
     if is_need_captcha:
       captcha_img = self.session.get(self.captcha_url, headers=self.captcha_headers).content
       data = {'image': base64.b64encode(captcha_img).decode('utf-8')}
       captcha = self.session.post(self.crack_captcha_url, json=data).json()['message']
     # 获得key值
     appkey = '1d8b6e7d45233436'
     data = {
           'appkey': appkey,
           'sign': self.__calcSign('appkey={}'.format(appkey))
         }
     response = self.session.post(self.getkey_url, data=data)
     response_json = response.json()
     key_hash = response_json['data']['hash']
     pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(response_json['data']['key'].encode('utf-8'))
     # 模拟登录
     if is_need_captcha:
       data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha={}&challenge=&channel=bili&cookies=&device=pc&password={}&permission=ALL&seccode=&subid=1&ts={}&username={}&validate=" \
           .format(appkey, captcha, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
     else:
       data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha=&challenge=&channel=bili&cookies=&device=pc&password={}&permission=ALL&seccode=&subid=1&ts={}&username={}&validate=" \
           .format(appkey, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
     data = "{}&sign={}".format(data, self.__calcSign(data))
     response = self.session.post(self.login_url, data=data, headers=self.login_headers)
     response_json = response.json()
     # 不需要验证码, 登录成功
     if response_json['code'] == 0 and response_json['data']['status'] == 0:
       for cookie in response_json['data']['cookie_info']['cookies']:
         self.session.cookies.set(cookie['name'], cookie['value'], domain='.bilibili')
       print('[INFO]: Account -> %s, login successfully' % username)
       infos_return = {'username': username}
       infos_return.update(response_json)
       return infos_return, self.session
     # 需要识别验证码
     elif response_json['code'] == -105:
       is_need_captcha = True
     # 账号密码错误
     elif response_json['code'] == -629:
       raise RuntimeError('Account -> %s, fail to login, username or password error' % username)
     # 其他错误
     else:
       raise RuntimeError(response_json.get('message'))
 '''计算sign值'''
 def __calcSign(self, param, salt="560c52ccd288fed045859ed18bffd973"):
   sign = hashlib.md5('{}{}'.format(param, salt).encode('utf-8'))
   return sign.hexdigest()
 '''初始化'''
 def __initialize(self):
  # 登陆请求头
   self.login_headers = {'Content-type': 'application/x-www-form-urlencoded'}
   # 破解验证码请求头
   self.captcha_headers = {'Host': 'passport.bilibili.com'}
   # 获取key密钥URL
   self.getkey_url = 'https://passport.bilibili.com/api/oauth2/getKey'
   # 获取登陆URL
   self.login_url = 'https://passport.bilibili.com/api/v3/oauth2/login'
   # 获取验证码URL
   self.captcha_url = 'https://passport.bilibili.com/captcha'
   # 破解网站来自: https://github.com/Hsury/Bilibili-Toolkit
   # 破解验证码URL
   self.crack_captcha_url = 'https://bili.dev:2233/captcha'
   # 请求头都得加这个
   self.session.headers.update({'User-Agent': "Mozilla/5.0 BiliDroid/5.51.1 (bbcallen@gmail.com)"})

移动端登陆

移动端与PC端类似,网址URL差异以及请求头差异。在此不过多介绍。
 全部代码:


'''移动端登录B站'''
class Bilibili_For_Mobile():
 def __init__(self, **kwargs):
   for key, value in kwargs.items(): setattr(self, key, value)
   self.session = requests.Session()
   self.__initialize()
 '''登录函数'''
 def login(self, username, password, crack_captcha_func=None, **kwargs):
   self.session.proxies.update(kwargs.get('proxies', {}))
   # 是否需要验证码
   is_need_captcha = False
   while True:
     # 需要验证码
     if is_need_captcha:
       captcha_img = self.session.get(self.captcha_url, headers=self.captcha_headers).content
       data = {'image': base64.b64encode(captcha_img).decode('utf-8')}
       captcha = self.session.post(self.crack_captcha_url, json=data).json()['message']
     # 获得key值
     appkey = 'bca7e84c2d947ac6'
     data = {
           'appkey': appkey,
           'sign': self.__calcSign('appkey={}'.format(appkey))
         }
     response = self.session.post(self.getkey_url, data=data)
     response_json = response.json()
     key_hash = response_json['data']['hash']
     pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(response_json['data']['key'].encode('utf-8'))
     # 模拟登录
     if is_need_captcha:
       data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha={}&challenge=&channel=bili&cookies=&device=phone&mobi_app=android&password={}&permission=ALL&platform=android&seccode=&subid=1&ts={}&username={}&validate=" \
           .format(appkey, captcha, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
     else:
       data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha=&challenge=&channel=bili&cookies=&device=phone&mobi_app=android&password={}&permission=ALL&platform=android&seccode=&subid=1&ts={}&username={}&validate=" \
           .format(appkey, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
     data = "{}&sign={}".format(data, self.__calcSign(data))
     response = self.session.post(self.login_url, data=data, headers=self.login_headers)
     response_json = response.json()
     # 不需要验证码, 登录成功
     if response_json['code'] == 0 and response_json['data']['status'] == 0:
       for cookie in response_json['data']['cookie_info']['cookies']:
         self.session.cookies.set(cookie['name'], cookie['value'], domain='.bilibili')
       print('[INFO]: Account -> %s, login successfully' % username)
       infos_return = {'username': username}
       infos_return.update(response_json)
       return infos_return, self.session
     # 需要识别验证码
     elif response_json['code'] == -105:
       is_need_captcha = True
     # 账号密码错误
     elif response_json['code'] == -629:
       raise RuntimeError('Account -> %s, fail to login, username or password error' % username)
     # 其他错误
     else:
       raise RuntimeError(response_json.get('message'))
 '''计算sign值'''
 def __calcSign(self, param, salt="60698ba2f68e01ce44738920a0ffe768"):
   sign = hashlib.md5('{}{}'.format(param, salt).encode('utf-8'))
   return sign.hexdigest()
 '''初始化'''
 def __initialize(self):
   self.login_headers = {
               'Content-type': 'application/x-www-form-urlencoded'
             }
   self.captcha_headers = {
               'Host': 'passport.bilibili.com'
             }
   self.getkey_url = 'https://passport.bilibili.com/api/oauth2/getKey'
   self.login_url = 'https://passport.bilibili.com/api/v3/oauth2/login'
   self.captcha_url = 'https://passport.bilibili.com/captcha'
   # 破解网站来自: https://github.com/Hsury/Bilibili-Toolkit
   self.crack_captcha_url = 'https://bili.dev:2233/captcha'
   self.session.headers.update({'User-Agent': "Mozilla/5.0 BiliDroid/5.51.1 (bbcallen@gmail.com)"})

来源:https://blog.csdn.net/qq_45414559/article/details/109726283

0
投稿

猜你喜欢

手机版 网络编程 asp之家 www.aspxhome.com