简单爬虫

import cookielib

import urllib2
url = “http://www.baidu.com
response1 = urllib2.urlopen(url)
print “第一种方法”
#获取状态码,200 表示成功

print response1.getcode()
#获取网页内容的长度

print len(response1.read()) print “第二种方法”
request = urllib2.Request(url)
#模拟 Mozilla 浏览器进行爬虫

request.add_header(“user-agent”,“Mozilla/5.0”)
response2 = urllib2.urlopen(request)
print response2.getcode()
print len(response2.read())
print “第三种方法”
cookie = cookielib.CookieJar()
#加入 urllib2 处理 cookie 的能力

opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) urllib2.install_opener(opener)
response3 = urllib2.urlopen(url)
print response3.getcode()
print len(response3.read()) print cookie