关于物流查询的一点小爱好
前段时间,苦思冥想打算写点爬虫消磨消磨时间,后来写了一个爬取天气信息的爬虫机器人,但是现在谁的手机每个天气预报啊,汗😅。于是乎,我打算研究一个能够爬取物流信息的机器人。能够帮助企业人员更好的查询物流信息。废话不多说,直接上代码。
import requests
import json
import re
from xpinyin import Pinyin
class Get_express_message():
def init(self):
self.headers = {
‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36’,
‘Host’: ‘www.kuaidi100.com’,
‘Referer’: ‘https://www.kuaidi100.com/?from=openv’
}
self.url = ‘https://www.kuaidi100.com/?from=openv’
#定义函数,传递快递拼音,组成 url 地址
def pinyin(self,hanzi):
#因为 ems 快递和百世快递不是按着拼音直接转译的,所以需要判断
if hanzi == ‘EMS’:
return ‘ems’
elif re.findall(‘百世’,hanzi) or re.findall(‘汇通’,hanzi):
return ‘huitongkuaidi’
else:
p = Pinyin()
res = p.get_pinyin(hanzi, '')
return res
#获取cookie信息,防止被反爬
def get_cookie(self):
session = requests.Session()
headers = {
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
}
req = session.get(self.url, headers=headers)
# 获取当前的Cookie
Cookie = dict(session.cookies)
print(Cookie)
return Cookie
def get_data(self, express_company, tracking_number):
company_name = self.pinyin(express_company)
url = 'https://www.kuaidi100.com/query?type={}&postid={}&temp=0.6174507801374183&phone='.format(
company_name, tracking_number)
print(url)
res = requests.get(url, headers=self.headers, cookies=self.get_cookie())
result_data = json.loads(res.content.decode())
result_data = result_data['data']
return result_data
def parse_data(self, res_list):
new_list = []
for i in res_list:
new_list.append(i['time'] + ':' + i['context'])
new_list.append('\r')
return new_list
def save_data(self, result_list, express_company, tracking_number):
with open(r'C:\Users\Administrator\Desktop\快递查询信息.txt', 'a') as f:
f.write('\n')
f.write('{}快递,单号为:{},查询结果如下:'.format(express_company, tracking_number))
f.write('\n')
for i in result_list:
f.write(i + '\n')
f.write('-' * 60)
f.close()
def run(self, express_company, tracking_number):
res = self.get_data(express_company, tracking_number)
result_list = self.parse_data(res)
self.save_data(result_list, express_company, tracking_number)
return result_list
if name == ‘main’:
express_company = ‘圆通’
tracking_number = ‘YT4341648917106’
get_express_message = Get_express_message()
result_list = get_express_message.run(express_company, tracking_number)
for i in result_list:
print(i)
破费德尔
电影名称、上映时间、导演、主演、大概剧情、该导演豆瓣评分最高的电影评分与分数
你觉得怎么样?
好建议,立刻落地执行 ~~~
哈哈,有点小流弊啊
我觉得写个 每月有哪些电影上映 主演是谁 导演是谁,感觉更流弊