关于物流查询的一点小爱好

前段时间,苦思冥想打算写点爬虫消磨消磨时间,后来写了一个爬取天气信息的爬虫机器人,但是现在谁的手机每个天气预报啊,汗😅。于是乎,我打算研究一个能够爬取物流信息的机器人。能够帮助企业人员更好的查询物流信息。废话不多说,直接上代码。
import requests
import json
import re
from xpinyin import Pinyin
class Get_express_message():
def init(self):
self.headers = {
‘user-agent’: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36’,
‘Host’: ‘www.kuaidi100.com’,
‘Referer’: ‘https://www.kuaidi100.com/?from=openv
}
self.url = ‘https://www.kuaidi100.com/?from=openv
#定义函数,传递快递拼音,组成 url 地址
def pinyin(self,hanzi):
#因为 ems 快递和百世快递不是按着拼音直接转译的,所以需要判断
if hanzi == ‘EMS’:
return ‘ems’
elif re.findall(‘百世’,hanzi) or re.findall(‘汇通’,hanzi):
return ‘huitongkuaidi’
else:
p = Pinyin()
res = p.get_pinyin(hanzi, '')
return res

#获取cookie信息,防止被反爬
def get_cookie(self):
    session = requests.Session()
    headers = {
        'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
    }
    req = session.get(self.url, headers=headers)
    # 获取当前的Cookie
    Cookie = dict(session.cookies)
    print(Cookie)
    return Cookie

def get_data(self, express_company, tracking_number):
    company_name = self.pinyin(express_company)
    url = 'https://www.kuaidi100.com/query?type={}&postid={}&temp=0.6174507801374183&phone='.format(
        company_name, tracking_number)
    print(url)
    res = requests.get(url, headers=self.headers, cookies=self.get_cookie())
    result_data = json.loads(res.content.decode())
    result_data = result_data['data']
    return result_data

def parse_data(self, res_list):
    new_list = []
    for i in res_list:
        new_list.append(i['time'] + ':' + i['context'])
        new_list.append('\r')

    return new_list

def save_data(self, result_list, express_company, tracking_number):
    with open(r'C:\Users\Administrator\Desktop\快递查询信息.txt', 'a') as f:
        f.write('\n')
        f.write('{}快递,单号为:{},查询结果如下:'.format(express_company, tracking_number))
        f.write('\n')
        for i in result_list:
            f.write(i + '\n')
        f.write('-' * 60)
    f.close()

def run(self, express_company, tracking_number):
    res = self.get_data(express_company, tracking_number)
    result_list = self.parse_data(res)
    self.save_data(result_list, express_company, tracking_number)
    return result_list

if name == ‘main’:
express_company = ‘圆通’
tracking_number = ‘YT4341648917106’
get_express_message = Get_express_message()
result_list = get_express_message.run(express_company, tracking_number)
for i in result_list:
print(i)