真实代码爬虫场景
自动化场景,和爬虫的场景非常相似,网页获取信息,拾取信息。
但实际利用 Python 代码进行爬虫,其中是对于网站服务器进行请求获取信息,和设计器的获取信息原理有所不同。
以下是实际爬虫代码案例代码:目标是获取游戏英雄信息,获取成功之后,进行英雄选择然后进行英雄出装策略。
from urllib.request import urlretrieve
import requests
import os
"""
函数说明:打印所有英雄的名字和ID
Parameters:
url - GET请求地址
header - headers信息
"""
def hero_list(url, header):
print('*' * 100)
print('\t\t\t\t欢迎使用《王者荣耀》出装下助手!')
print('*' * 100)
req = requests.get(url=url, headers=header).json()
flag = 0
for each_hero in req['list']:
flag += 1
print('%s的ID为:%-7s' % (each_hero['name'], each_hero['hero_id']), end='\t\t')
if flag == 3:
print('\n', end='')
flag = 0
"""
函数说明:根据equip_id查询武器名字和价格
Parameters:
equip_id - 武器的ID
weapon_info - 存储所有武器的字典
Returns:
weapon_name - 武器的名字
weapon_price - 武器的价格
"""
def seek_weapon(equip_id, weapon_info):
for each_weapon in weapon_info:
if each_weapon['equip_id'] == str(equip_id):
weapon_name = each_weapon['name']
weapon_price = each_weapon['price']
return weapon_name, weapon_price
"""
函数说明:获取并打印出装信息
Parameters:
url - GET请求地址,通过Fiddler抓包获取
header - headers信息
weapon_info - 存储所有武器的字典
"""
def hero_info(url, header, weapon_info):
req = requests.get(url=url, headers=header).json()
print('\n历史上的%s:\n %s' % (req['info']['name'], req['info']['history_intro']))
for each_equip_choice in req['info']['equip_choice']:
print('\n%s:\n %s' % (each_equip_choice['title'], each_equip_choice['description']))
total_price = 0
flag = 0
for each_weapon in each_equip_choice['list']:
flag += 1
weapon_name, weapon_price = seek_weapon(each_weapon['equip_id'], weapon_info)
print('%s:%s' % (weapon_name, weapon_price), end='\t')
if flag == 3:
print('\n', end='')
flag = 0
total_price += int(weapon_price)
print('神装套件价格共计:%d' % total_price)
"""
函数说明:获取武器信息
Parameters:
url - GET请求地址,通过Fiddler抓包获取
header - headers信息
Returns:
weapon_info_dict - 武器信息
"""
def hero_weapon(url, header):
req = requests.get(url=url, headers=header).json()
weapon_info_dict = req['list']
return weapon_info_dict
if __name__ == '__main__':
headers = {'Accept-Charset': 'UTF-8',
'Accept-Encoding': 'gzip,deflate',
'User-Agent': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; MI 5 MIUI/V8.1.6.0.MAACNDI)',
'X-Requested-With': 'XMLHttpRequest',
'Content-type': 'application/x-www-form-urlencoded',
'Connection': 'Keep-Alive',
'Host': 'gamehelper.gm825.com'}
weapon_url = "http://gamehelper.gm825.com/wzry/equip/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8"
heros_url = "http://gamehelper.gm825.com/wzry/hero/list?channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8"
hero_list(heros_url, headers)
print()
hero_id = input("请输入要查询的英雄ID:")
hero_url = "http://gamehelper.gm825.com/wzry/hero/detail?hero_id={}&channel_id=90009a&app_id=h9044j&game_id=7622&game_name=%E7%8E%8B%E8%80%85%E8%8D%A3%E8%80%80&vcode=12.0.3&version_code=1203&cuid=2654CC14D2D3894DBF5808264AE2DAD7&ovr=6.0.1&device=Xiaomi_MI+5&net_type=1&client_id=1Yfyt44QSqu7PcVdDduBYQ%3D%3D&info_ms=fBzJ%2BCu4ZDAtl4CyHuZ%2FJQ%3D%3D&info_ma=XshbgIgi0V1HxXTqixI%2BKbgXtNtOP0%2Fn1WZtMWRWj5o%3D&mno=0&info_la=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&info_ci=9AChHTMC3uW%2BfY8%2BCFhcFw%3D%3D&mcc=0&clientversion=&bssid=VY%2BeiuZRJ%2FwaXmoLLVUrMODX1ZTf%2F2dzsWn2AOEM0I4%3D&os_level=23&os_id=dc451556fc0eeadb&resolution=1080_1920&dpi=480&client_ip=192.168.0.198&pdunid=a83d20d8".format(
hero_id)
weapon_info_dict = hero_weapon(weapon_url, headers)
hero_info(hero_url, headers, weapon_info_dict)
以上代码,调整好 Python 环境之后,能够直接运行,大家有兴趣可以看看代码效果,感受实际爬虫的效率和设计器运行过程的效率比较。真正的大型爬虫项目,对于并发性和效率要求更高。
感谢分享。