Python3 编写简单的爬虫 - 木子才的博客

需要安装：

pip3 install requests
pip3 install bs4
pip3 install lxml

#!/usr/bin/python
#coding: UTF-8

import requests
from bs4 import BeautifulSoup

#第一步：获取页面
link = "http://www.santostang.com/"
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.117 Safari/537.36'}
#用 requests 的 headers 伪装成浏览器访问。
r = requests.get(link, headers = headers)
# print(r.text)

#第二步：提取需要的数据
soup = BeautifulSoup(r.text, "lxml")
title = soup.find("h1", class_="post-title").a.text.strip()
print(title)

#第三步：存储数据
with open('title.txt', 'a+') as f:
    f.write(title)
    f.close()

title.txt 文件将保存到 py 文件同目录下。